From 00398e1d518309328e8ba7dff00881538ac22c6a Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 19:50:41 +0000 Subject: Bluetooth: Add support for BT_PKT_STATUS CMSG data for SCO connections This change adds support for reporting the BT_PKT_STATUS to the socket CMSG data to allow the implementation of a packet loss correction on erroneous data received on the SCO socket. The patch was partially developed by Marcel Holtmann and validated by Hsin-yu Chao. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 10 ++++++++++ include/net/bluetooth/sco.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 18190055374c..7ee8041af803 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -147,6 +147,10 @@ struct bt_voice { #define BT_MODE_LE_FLOWCTL 0x03 #define BT_MODE_EXT_FLOWCTL 0x04 +#define BT_PKT_STATUS 16 + +#define BT_SCM_PKT_STATUS 0x03 + __printf(1, 2) void bt_info(const char *fmt, ...); __printf(1, 2) @@ -286,6 +290,7 @@ struct bt_sock { struct sock *parent; unsigned long flags; void (*skb_msg_name)(struct sk_buff *, void *, int *); + void (*skb_put_cmsg)(struct sk_buff *, struct msghdr *, struct sock *); }; enum { @@ -335,6 +340,10 @@ struct l2cap_ctrl { struct l2cap_chan *chan; }; +struct sco_ctrl { + u8 pkt_status; +}; + struct hci_dev; typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); @@ -361,6 +370,7 @@ struct bt_skb_cb { u8 incoming:1; union { struct l2cap_ctrl l2cap; + struct sco_ctrl sco; struct hci_ctrl hci; }; }; diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h index f40ddb4264fc..1aa2e14b6c94 100644 --- a/include/net/bluetooth/sco.h +++ b/include/net/bluetooth/sco.h @@ -46,4 +46,6 @@ struct sco_conninfo { __u8 dev_class[3]; }; +#define SCO_CMSG_PKT_STATUS 0x01 + #endif /* __SCO_H */ -- cgit v1.2.3 From 32929e1f4ad9adf71f655028e4dd5d87adb97f52 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 14:26:10 +0000 Subject: Bluetooth: Use only 8 bits for the HCI CMSG state flags This change implements suggestions from the code review of the SCO CMSG state flag patch. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9352bb1bf34c..9949870f7d78 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -31,8 +31,8 @@ #define HCI_TIME_STAMP 3 /* CMSG flags */ -#define HCI_CMSG_DIR 0x0001 -#define HCI_CMSG_TSTAMP 0x0002 +#define HCI_CMSG_DIR 0x01 +#define HCI_CMSG_TSTAMP 0x02 struct sockaddr_hci { sa_family_t hci_family; -- cgit v1.2.3 From 7e90de4ac1099d3f4e26023853d4aefd0d2a1dea Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 02:01:55 +0000 Subject: Bluetooth: mgmt: read/set system parameter definitions This patch submits the corresponding kernel definitions to mgmt.h. This is submitted before the implementation to avoid any conflicts in values allocations. Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Yu Liu Signed-off-by: Marcel Holtmann --- include/net/bluetooth/mgmt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 16e0d87bd8fa..e515288f328f 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -52,6 +52,12 @@ struct mgmt_hdr { __le16 len; } __packed; +struct mgmt_tlv { + __le16 type; + __u8 length; + __u8 value[]; +} __packed; + struct mgmt_addr_info { bdaddr_t bdaddr; __u8 type; @@ -702,6 +708,18 @@ struct mgmt_rp_set_exp_feature { __le32 flags; } __packed; +#define MGMT_OP_READ_DEF_SYSTEM_CONFIG 0x004b +#define MGMT_READ_DEF_SYSTEM_CONFIG_SIZE 0 + +#define MGMT_OP_SET_DEF_SYSTEM_CONFIG 0x004c +#define MGMT_SET_DEF_SYSTEM_CONFIG_SIZE 0 + +#define MGMT_OP_READ_DEF_RUNTIME_CONFIG 0x004d +#define MGMT_READ_DEF_RUNTIME_CONFIG_SIZE 0 + +#define MGMT_OP_SET_DEF_RUNTIME_CONFIG 0x004e +#define MGMT_SET_DEF_RUNTIME_CONFIG_SIZE 0 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; -- cgit v1.2.3 From 10873f99ced274cbfc119f55e7e57a0f047a0799 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 02:01:56 +0000 Subject: Bluetooth: centralize default value initialization. This patch centralized the initialization of default parameters. This is required to allow clients to more easily customize the default system parameters. Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cdd4f1db8670..0d5dbb6cb5a0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -295,6 +295,14 @@ struct hci_dev { __u8 le_scan_type; __u16 le_scan_interval; __u16 le_scan_window; + __u16 le_scan_int_suspend; + __u16 le_scan_window_suspend; + __u16 le_scan_int_discovery; + __u16 le_scan_window_discovery; + __u16 le_scan_int_adv_monitor; + __u16 le_scan_window_adv_monitor; + __u16 le_scan_int_connect; + __u16 le_scan_window_connect; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_latency; @@ -323,6 +331,16 @@ struct hci_dev { __u16 devid_product; __u16 devid_version; + __u8 def_page_scan_type; + __u16 def_page_scan_int; + __u16 def_page_scan_window; + __u8 def_inq_scan_type; + __u16 def_inq_scan_int; + __u16 def_inq_scan_window; + __u16 def_br_lsto; + __u16 def_page_timeout; + __u16 def_multi_adv_rotation_duration; + __u16 pkt_type; __u16 esco_type; __u16 link_policy; -- cgit v1.2.3 From 8baaa4038edbff67f318574e233e9e7e43808230 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:08 +0200 Subject: Bluetooth: Add bdaddr_list_with_flags for classic whitelist In order to more easily add device flags to classic devices, create a new type of bdaddr_list that supports setting flags. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0d5dbb6cb5a0..95a3935325bb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -136,6 +136,13 @@ struct bdaddr_list_with_irk { u8 local_irk[16]; }; +struct bdaddr_list_with_flags { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + u32 current_flags; +}; + struct bt_uuid { struct list_head list; u8 uuid[16]; @@ -1169,12 +1176,19 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list, struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( struct list_head *list, bdaddr_t *bdaddr, u8 type); +struct bdaddr_list_with_flags * +hci_bdaddr_list_lookup_with_flags(struct list_head *list, bdaddr_t *bdaddr, + u8 type); int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, - u8 type, u8 *peer_irk, u8 *local_irk); + u8 type, u8 *peer_irk, u8 *local_irk); +int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr, + u8 type, u32 flags); int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, - u8 type); + u8 type); +int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr, + u8 type); void hci_bdaddr_list_clear(struct list_head *list); struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, -- cgit v1.2.3 From 7a92906f841db46a91df0179459ad8b2052f2e54 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:09 +0200 Subject: Bluetooth: Replace wakeable list with flag Since the classic device list now supports flags, convert the wakeable list into a flag on the existing device list. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 95a3935325bb..0643c737ba85 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -143,6 +143,16 @@ struct bdaddr_list_with_flags { u32 current_flags; }; +enum hci_conn_flags { + HCI_CONN_FLAG_REMOTE_WAKEUP, + HCI_CONN_FLAG_MAX +}; + +#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) + +/* Make sure number of flags doesn't exceed sizeof(current_flags) */ +static_assert(HCI_CONN_FLAG_MAX < 32); + struct bt_uuid { struct list_head list; u8 uuid[16]; @@ -463,7 +473,6 @@ struct hci_dev { struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; - struct list_head wakeable; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; -- cgit v1.2.3 From a1fc7535ec34a5904abe93dd42a6ed7e31c36717 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:10 +0200 Subject: Bluetooth: Replace wakeable in hci_conn_params Replace the wakeable boolean with flags in hci_conn_params and all users of this boolean. This will be used by the get/set device flags mgmt op. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0643c737ba85..6f88e5d81bd2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -660,7 +660,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - bool wakeable; + u32 current_flags; }; extern struct list_head hci_dev_list; -- cgit v1.2.3 From 4c54bf2b093bb2ae95e756342646d868e8101cb4 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:11 +0200 Subject: Bluetooth: Add get/set device flags mgmt op Add the get device flags and set device flags mgmt ops and the device flags changed event. Their behavior is described in detail in mgmt-api.txt in bluez. Sample btmon trace when a HID device is added (trimmed to 75 chars): @ MGMT Command: Unknown (0x0050) plen 11 {0x0001} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 ........... @ MGMT Event: Unknown (0x002a) plen 15 {0x0004} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0003} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0002} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Command Compl.. (0x0001) plen 10 {0x0001} [hci0] 18:06:14.98 Unknown (0x0050) plen 7 Status: Success (0x00) 90 c5 13 cd f3 cd 02 ....... @ MGMT Command: Add Device (0x0033) plen 8 {0x0001} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0004} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0003} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0002} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Unknown (0x002a) plen 15 {0x0004} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0003} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0002} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0001} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/mgmt.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index e515288f328f..8e47b0c5fe52 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -720,6 +720,27 @@ struct mgmt_rp_set_exp_feature { #define MGMT_OP_SET_DEF_RUNTIME_CONFIG 0x004e #define MGMT_SET_DEF_RUNTIME_CONFIG_SIZE 0 +#define MGMT_OP_GET_DEVICE_FLAGS 0x004F +#define MGMT_GET_DEVICE_FLAGS_SIZE 7 +struct mgmt_cp_get_device_flags { + struct mgmt_addr_info addr; +} __packed; +struct mgmt_rp_get_device_flags { + struct mgmt_addr_info addr; + __le32 supported_flags; + __le32 current_flags; +} __packed; + +#define MGMT_OP_SET_DEVICE_FLAGS 0x0050 +#define MGMT_SET_DEVICE_FLAGS_SIZE 11 +struct mgmt_cp_set_device_flags { + struct mgmt_addr_info addr; + __le32 current_flags; +} __packed; +struct mgmt_rp_set_device_flags { + struct mgmt_addr_info addr; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -951,3 +972,10 @@ struct mgmt_ev_exp_feature_changed { __u8 uuid[16]; __le32 flags; } __packed; + +#define MGMT_EV_DEVICE_FLAGS_CHANGED 0x002a +struct mgmt_ev_device_flags_changed { + struct mgmt_addr_info addr; + __le32 supported_flags; + __le32 current_flags; +} __packed; -- cgit v1.2.3 From 7fceb17c6b480e0f2bd0e566a8231039fb8a809e Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:12 +0200 Subject: Bluetooth: Add definitions for advertisement monitor features This adds support for Advertisement Monitor API. Here are the commands and events added. - Read Advertisement Monitor Feature command - Add Advertisement Pattern Monitor command - Remove Advertisement Monitor command - Advertisement Monitor Added event - Advertisement Monitor Removed event Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/mgmt.h | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 8e47b0c5fe52..beae5c3980f0 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -741,6 +741,45 @@ struct mgmt_rp_set_device_flags { struct mgmt_addr_info addr; } __packed; +#define MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS BIT(0) + +#define MGMT_OP_READ_ADV_MONITOR_FEATURES 0x0051 +#define MGMT_READ_ADV_MONITOR_FEATURES_SIZE 0 +struct mgmt_rp_read_adv_monitor_features { + __le32 supported_features; + __le32 enabled_features; + __le16 max_num_handles; + __u8 max_num_patterns; + __le16 num_handles; + __le16 handles[]; +} __packed; + +struct mgmt_adv_pattern { + __u8 ad_type; + __u8 offset; + __u8 length; + __u8 value[31]; +} __packed; + +#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 +struct mgmt_cp_add_adv_patterns_monitor { + __u8 pattern_count; + struct mgmt_adv_pattern patterns[]; +} __packed; +#define MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE 1 +struct mgmt_rp_add_adv_patterns_monitor { + __le16 monitor_handle; +} __packed; + +#define MGMT_OP_REMOVE_ADV_MONITOR 0x0053 +struct mgmt_cp_remove_adv_monitor { + __le16 monitor_handle; +} __packed; +#define MGMT_REMOVE_ADV_MONITOR_SIZE 2 +struct mgmt_rp_remove_adv_monitor { + __le16 monitor_handle; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -979,3 +1018,13 @@ struct mgmt_ev_device_flags_changed { __le32 supported_flags; __le32 current_flags; } __packed; + +#define MGMT_EV_ADV_MONITOR_ADDED 0x002b +struct mgmt_ev_adv_monitor_added { + __le16 monitor_handle; +} __packed; + +#define MGMT_EV_ADV_MONITOR_REMOVED 0x002c +struct mgmt_ev_adv_monitor_removed { + __le16 monitor_handle; +} __packed; -- cgit v1.2.3 From e5e1e7fd470ccf2eb38ab7fb5a3ab0fc4792fe53 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:13 +0200 Subject: Bluetooth: Add handler of MGMT_OP_READ_ADV_MONITOR_FEATURES This adds the request handler of MGMT_OP_READ_ADV_MONITOR_FEATURES command. Since the controller-based monitoring is not yet in place, this report only the supported features but not the enabled features. The following test was performed. - Issuing btmgmt advmon-features. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6f88e5d81bd2..4e9d51087674 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -25,6 +25,7 @@ #ifndef __HCI_CORE_H #define __HCI_CORE_H +#include #include #include @@ -237,6 +238,24 @@ struct adv_info { #define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 +struct adv_pattern { + struct list_head list; + __u8 ad_type; + __u8 offset; + __u8 length; + __u8 value[HCI_MAX_AD_LENGTH]; +}; + +struct adv_monitor { + struct list_head patterns; + bool active; + __u16 handle; +}; + +#define HCI_MIN_ADV_MONITOR_HANDLE 1 +#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 +#define HCI_MAX_ADV_MONITOR_NUM_PATTERNS 16 + #define HCI_MAX_SHORT_NAME_LENGTH 10 /* Min encryption key size to match with SMP */ @@ -511,6 +530,9 @@ struct hci_dev { __u16 adv_instance_timeout; struct delayed_work adv_instance_expire; + struct idr adv_monitors_idr; + unsigned int adv_monitors_cnt; + __u8 irk[16]; __u32 rpa_timeout; struct delayed_work rpa_expired; @@ -1258,6 +1280,8 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); +void hci_adv_monitors_clear(struct hci_dev *hdev); + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); void hci_init_sysfs(struct hci_dev *hdev); -- cgit v1.2.3 From b139553db5cd940d66095fb97de1727e9a19369f Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:14 +0200 Subject: Bluetooth: Add handler of MGMT_OP_ADD_ADV_PATTERNS_MONITOR This adds the request handler of MGMT_OP_ADD_ADV_PATTERNS_MONITOR command. Note that the controller-based monitoring is not yet in place. This tracks the content of the monitor without sending HCI traffic, so the request returns immediately. The following manual test was performed. - Issue btmgmt advmon-add with valid and invalid inputs. - Issue btmgmt advmon-add more the allowed number of monitors. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4e9d51087674..13fad419ae7d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1281,6 +1281,8 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_adv_monitors_clear(struct hci_dev *hdev); +void hci_free_adv_monitor(struct adv_monitor *monitor); +int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -- cgit v1.2.3 From bd2fbc6cb815b5171facb42526f6db206d920e13 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:15 +0200 Subject: Bluetooth: Add handler of MGMT_OP_REMOVE_ADV_MONITOR This adds the request handler of MGMT_OP_REMOVE_ADV_MONITOR command. Note that the controller-based monitoring is not yet in place. This removes the internal monitor(s) without sending HCI traffic, so the request returns immediately. The following test was performed. - Issue btmgmt advmon-remove with valid and invalid handles. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 13fad419ae7d..c54f9295892e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1283,6 +1283,7 @@ void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct adv_monitor *monitor); int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); +int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -- cgit v1.2.3 From 8208f5a9d435e58ee7f53a24d9ccbe7787944537 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:18 +0200 Subject: Bluetooth: Update background scan and report device based on advertisement monitors This calls hci_update_background_scan() when there is any update on the advertisement monitors. If there is at least one advertisement monitor, the filtering policy of scan parameters should be 0x00. This also reports device found mgmt events if there is at least one monitor. The following cases were tested with btmgmt advmon-* commands. (1) add a ADV monitor and observe that the passive scanning is triggered. (2) remove the last ADV monitor and observe that the passive scanning is terminated. (3) with a LE peripheral paired, repeat (1) and observe the passive scanning continues. (4) with a LE peripheral paired, repeat (2) and observe the passive scanning continues. (5) with a ADV monitor, suspend/resume the host and observe the passive scanning continues. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c54f9295892e..524057598ffd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1284,6 +1284,7 @@ void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct adv_monitor *monitor); int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle); +bool hci_is_adv_monitoring(struct hci_dev *hdev); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -- cgit v1.2.3 From 76b139965575e51224d33ea721d9d00a542b6b39 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Wed, 17 Jun 2020 16:39:19 +0200 Subject: Bluetooth: Terminate the link if pairing is cancelled If user decides to cancel the ongoing pairing process (e.g. by clicking the cancel button on pairing/passkey window), abort any ongoing pairing and then terminate the link if it was created because of the pair device action. Signed-off-by: Manish Mandlik Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 524057598ffd..77d29341b064 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -564,6 +564,12 @@ struct hci_dev { #define HCI_PHY_HANDLE(handle) (handle & 0xff) +enum conn_reasons { + CONN_REASON_PAIR_DEVICE, + CONN_REASON_L2CAP_CHAN, + CONN_REASON_SCO_CONNECT, +}; + struct hci_conn { struct list_head list; @@ -615,6 +621,8 @@ struct hci_conn { __s8 max_tx_power; unsigned long flags; + enum conn_reasons conn_reason; + __u32 clock; __u16 clock_accuracy; @@ -1040,12 +1048,14 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout); + u16 conn_timeout, + enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role, bdaddr_t *direct_rpa); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, - u8 sec_level, u8 auth_type); + u8 sec_level, u8 auth_type, + enum conn_reasons conn_reason); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting); int hci_conn_check_link_mode(struct hci_conn *conn); -- cgit v1.2.3 From 9c77b803f263573b6019e4828825709845c37d45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:22 -0700 Subject: net: tso: double TSO_HEADER_SIZE value Transport header size could be 60 bytes, and network header size can also be 60 bytes. Add the Ethernet header and we are above 128 bytes. Since drivers using net/core/tso.c usually allocates one DMA coherent piece of memory per TX queue, this patch might cause issues if a driver was using too many slots. For 1024 slots, we would need 256 KB of physically contiguous memory instead of 128 KB. Alternative fix would be to add checks in the fast path, but this involves more work in all drivers using net/core/tso.c. Fixes: f9cbe9a556af ("net: define the TSO header size in net/tso.h") Signed-off-by: Eric Dumazet Cc: Antoine Tenart Signed-off-by: David S. Miller --- include/net/tso.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index 7e166a570349..c33dd00c161f 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -4,7 +4,7 @@ #include -#define TSO_HEADER_SIZE 128 +#define TSO_HEADER_SIZE 256 struct tso_t { int next_frag_idx; -- cgit v1.2.3 From 185c3e5860227065dcb6ee884b45e0debe4762dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:23 -0700 Subject: net: tso: shrink struct tso_t size field can be an int, no need for size_t Removes a 32bit hole on 64bit kernels. And align fields for better readability. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tso.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index c33dd00c161f..d9b0a14b2a57 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -7,12 +7,12 @@ #define TSO_HEADER_SIZE 256 struct tso_t { - int next_frag_idx; - void *data; - size_t size; - u16 ip_id; - bool ipv6; - u32 tcp_seq; + int next_frag_idx; + int size; + void *data; + u16 ip_id; + bool ipv6; + u32 tcp_seq; }; int tso_count_descs(struct sk_buff *skb); -- cgit v1.2.3 From 504b912150983a8b2499bbf9e4501336677404c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:24 -0700 Subject: net: tso: constify tso_count_descs() and friends skb argument of tso_count_descs(), tso_build_hdr() and tso_build_data() can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tso.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index d9b0a14b2a57..32d9272ade6a 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -15,10 +15,10 @@ struct tso_t { u32 tcp_seq; }; -int tso_count_descs(struct sk_buff *skb); -void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, +int tso_count_descs(const struct sk_buff *skb); +void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last); -void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size); +void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size); void tso_start(struct sk_buff *skb, struct tso_t *tso); #endif /* _TSO_H */ -- cgit v1.2.3 From 761b331cb6902dc0a08f786e9fa0dbd572059027 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:25 -0700 Subject: net: tso: cache transport header length Add tlen field into struct tso_t, and change tso_start() to return skb_transport_offset(skb) + tso->tlen This removes from callers the need to use tcp_hdrlen(skb) and will ease UDP segmentation offload addition. v2: calls tso_start() earlier in otx2_sq_append_tso() [Jakub] Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tso.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index 32d9272ade6a..62c98a9c60f1 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -11,6 +11,7 @@ struct tso_t { int size; void *data; u16 ip_id; + u8 tlen; /* transport header len */ bool ipv6; u32 tcp_seq; }; @@ -19,6 +20,6 @@ int tso_count_descs(const struct sk_buff *skb); void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last); void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size); -void tso_start(struct sk_buff *skb, struct tso_t *tso); +int tso_start(struct sk_buff *skb, struct tso_t *tso); #endif /* _TSO_H */ -- cgit v1.2.3 From 4b61d3e8d3daebbde7ec02d593f84248fdf8bec2 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Fri, 19 Jun 2020 14:01:07 +0800 Subject: net: qos offload add flow status with dropped count This patch adds a drop frames counter to tc flower offloading. Reporting h/w dropped frames is necessary for some actions. Some actions like police action and the coming introduced stream gate action would produce dropped frames which is necessary for user. Status update shows how many filtered packets increasing and how many dropped in those packets. v2: Changes - Update commit comments suggest by Jiri Pirko. Signed-off-by: Po Liu Reviewed-by: Simon Horman Reviewed-by: Vlad Buslov Signed-off-by: David S. Miller --- include/net/act_api.h | 11 ++++++----- include/net/flow_offload.h | 5 ++++- include/net/pkt_cls.h | 5 +++-- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8c3934880670..cb382a89ea58 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -106,7 +106,7 @@ struct tc_action_ops { struct netlink_callback *, int, const struct tc_action_ops *, struct netlink_ext_ack *); - void (*stats_update)(struct tc_action *, u64, u32, u64, bool); + void (*stats_update)(struct tc_action *, u64, u64, u64, u64, bool); size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a, tc_action_priv_destructor *destructor); @@ -232,8 +232,8 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) spin_unlock(&a->tcfa_lock); } -void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets, - bool drop, bool hw); +void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, @@ -244,13 +244,14 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, - u64 packets, u64 lastuse, bool hw) + u64 packets, u64 drops, + u64 lastuse, bool hw) { #ifdef CONFIG_NET_CLS_ACT if (!a->ops->stats_update) return; - a->ops->stats_update(a, bytes, packets, lastuse, hw); + a->ops->stats_update(a, bytes, packets, drops, lastuse, hw); #endif } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f2c8311a0433..00c15f14c434 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -389,17 +389,20 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule, struct flow_stats { u64 pkts; u64 bytes; + u64 drops; u64 lastused; enum flow_action_hw_stats used_hw_stats; bool used_hw_stats_valid; }; static inline void flow_stats_update(struct flow_stats *flow_stats, - u64 bytes, u64 pkts, u64 lastused, + u64 bytes, u64 pkts, + u64 drops, u64 lastused, enum flow_action_hw_stats used_hw_stats) { flow_stats->pkts += pkts; flow_stats->bytes += bytes; + flow_stats->drops += drops; flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused); /* The driver should pass value with a maximum of one bit set. diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ed65619cbc47..ff017e5b3ea2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -262,7 +262,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) static inline void tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 lastuse, + u64 bytes, u64 packets, u64 drops, u64 lastuse, u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT @@ -273,7 +273,8 @@ tcf_exts_stats_update(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, lastuse, true); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); a->used_hw_stats = used_hw_stats; a->used_hw_stats_valid = used_hw_stats_valid; } -- cgit v1.2.3 From cc7a21b6fbd945f8d8f61422ccd27203c1fafeb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:02:59 -0700 Subject: ipv6: icmp6: avoid indirect call for icmpv6_send() If IPv6 is builtin, we do not need an expensive indirect call to reach icmp6_send(). v2: put inline keyword before the type to avoid sparse warnings. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 33d379602314..1b3371ae8193 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -13,12 +13,32 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #include #if IS_ENABLED(CONFIG_IPV6) -extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); +#if IS_BUILTIN(CONFIG_IPV6) +void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr); +static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +{ + icmp6_send(skb, type, code, info, NULL); +} +static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) +{ + BUILD_BUG_ON(fn != icmp6_send); + return 0; +} +static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) +{ + BUILD_BUG_ON(fn != icmp6_send); + return 0; +} +#else +extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); +#endif + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); -- cgit v1.2.3 From c746053d275c8b6ff1d713addabf049c9c9a58fc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Jun 2020 14:45:58 +0100 Subject: net: phy: add support for probing MMDs >= 8 for devices-in-package Add support for probing MMDs above 7 for a valid devices-in-package specifier, but only probe the vendor MMDs for this if they also report that there the device is present in status register 2. This avoids issues where the MMD is implemented, but does not provide IEEE 802.3 compliant registers (such as the MV88X3310 PHY.) Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8c05d0fb5c00..abe318387331 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -388,6 +388,8 @@ enum phy_state { PHY_CABLETEST, }; +#define MDIO_MMD_NUM 32 + /** * struct phy_c45_device_ids - 802.3-c45 Device Identifiers * @devices_in_package: Bit vector of devices present. -- cgit v1.2.3 From 320ed3bf900075614c43499dc01db8d25717b986 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Jun 2020 14:46:08 +0100 Subject: net: phy: split devices_in_package We have two competing requirements for the devices_in_package field. We want to use it as a bit array indicating which MMDs are present, but we also want to know if the Clause 22 registers are present. Since "devices in package" is a term used in the 802.3 specification, keep this as the as-specified values read from the PHY, and introduce a new member "mmds_present" to indicate which MMDs are actually present in the PHY, derived from the "devices in package" value. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index abe318387331..19d9e040ad84 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -392,11 +392,13 @@ enum phy_state { /** * struct phy_c45_device_ids - 802.3-c45 Device Identifiers - * @devices_in_package: Bit vector of devices present. + * @devices_in_package: IEEE 802.3 devices in package register value. + * @mmds_present: bit vector of MMDs present. * @device_ids: The device identifer for each present device. */ struct phy_c45_device_ids { u32 devices_in_package; + u32 mmds_present; u32 device_ids[8]; }; -- cgit v1.2.3 From 389a338999875b6e7b111096e8b6484434556449 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Jun 2020 14:46:13 +0100 Subject: net: phy: read MMD ID from all present MMDs Expand the device_ids[] array to allow all MMD IDs to be read rather than just the first 8 MMDs, but only read the ID if the MDIO_STAT2 register reports that a device really is present here for these new devices to maintain compatibility with our current behaviour. Note that only a limited number of devices have MDIO_STAT2. 88X3310 PHY vendor MMDs do are marked as present in the devices_in_package, but do not contain IEE 802.3 compatible register sets in their lower space. This avoids reading incorrect values as MMD identifiers. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 19d9e040ad84..9248dd2ce4ca 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -399,7 +399,7 @@ enum phy_state { struct phy_c45_device_ids { u32 devices_in_package; u32 mmds_present; - u32 device_ids[8]; + u32 device_ids[MDIO_MMD_NUM]; }; struct macsec_context; -- cgit v1.2.3 From 49042c220b3a31e25902b36df71b23dc10efa0b8 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 20 Jun 2020 00:54:43 +0200 Subject: l3mdev: add infrastructure for table to VRF mapping Add infrastructure to l3mdev (the core code for Layer 3 master devices) in order to find out the corresponding VRF device for a given table id. Therefore, the l3mdev implementations: - can register a callback that returns the device index of the l3mdev associated with a given table id; - can offer the lookup function (table to VRF device). Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- include/net/l3mdev.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e942372b077b..031c661aa14d 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -10,6 +10,16 @@ #include #include +enum l3mdev_type { + L3MDEV_TYPE_UNSPEC, + L3MDEV_TYPE_VRF, + __L3MDEV_TYPE_MAX +}; + +#define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1) + +typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d); + /** * struct l3mdev_ops - l3mdev operations * @@ -37,6 +47,15 @@ struct l3mdev_ops { #ifdef CONFIG_NET_L3_MASTER_DEV +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + lookup_by_table_id_t fn); + +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + lookup_by_table_id_t fn); + +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, + u32 table_id); + int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); @@ -280,6 +299,26 @@ struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) return skb; } +static inline +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ + return -EOPNOTSUPP; +} + +static inline +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ +} + +static inline +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, + u32 table_id) +{ + return -ENODEV; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) -- cgit v1.2.3 From aae4e500e106d2ce48d5bdb21210e36efc7460cb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 20 Jun 2020 18:43:46 +0300 Subject: net: mscc: ocelot: generalize the "ACE/ACL" names Access Control Lists (and their respective Access Control Entries) are specifically entries in the VCAP IS2, the security enforcement block, according to the documentation. Let's rename the structures and functions to something more generic, so that VCAP IS1 structures (which would otherwise have to be called Ingress Classification Entries) can reuse the same code without confusion. Some renaming that was done: struct ocelot_ace_rule -> struct ocelot_vcap_filter struct ocelot_acl_block -> struct ocelot_vcap_block enum ocelot_ace_type -> enum ocelot_vcap_key_type struct ocelot_ace_vlan -> struct ocelot_vcap_key_vlan enum ocelot_ace_action -> enum ocelot_vcap_action struct ocelot_ace_stats -> struct ocelot_vcap_stats enum ocelot_ace_type -> enum ocelot_vcap_key_type struct ocelot_ace_frame_* -> struct ocelot_vcap_key_* No functional change is intended. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4953e9994df3..fa2c3904049e 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -470,7 +470,7 @@ struct ocelot_ops { int (*reset)(struct ocelot *ocelot); }; -struct ocelot_acl_block { +struct ocelot_vcap_block { struct list_head rules; int count; int pol_lpr; @@ -535,7 +535,7 @@ struct ocelot { struct list_head multicast; - struct ocelot_acl_block acl_block; + struct ocelot_vcap_block block; const struct vcap_field *vcap_is2_keys; const struct vcap_field *vcap_is2_actions; -- cgit v1.2.3 From 78e57f152c001eed0321ba4413a07c9e33e753e6 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 18 Jun 2020 14:22:15 -0700 Subject: net: Avoid overwriting valid skb->napi_id This will be useful to allow busy poll for tunneled traffic. In case of busy poll for sessions over tunnels, the underlying physical device's queues need to be polled. Tunnels schedule NAPI either via netif_rx() for backlog queue or schedule the gro_cell_poll(). netif_rx() propagates the valid skb->napi_id to the socket. OTOH, gro_cell_poll() stamps the skb->napi_id again by calling skb_mark_napi_id() with the tunnel NAPI which is not a busy poll candidate. This was preventing tunneled traffic to use busy poll. A valid NAPI ID in the skb indicates it was already marked for busy poll by a NAPI driver and hence needs to be copied into the socket. Signed-off-by: Amritha Nambiar Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 86e028388bad..b001fa91c14e 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -114,7 +114,11 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) { #ifdef CONFIG_NET_RX_BUSY_POLL - skb->napi_id = napi->napi_id; + /* If the skb was already marked with a valid NAPI ID, avoid overwriting + * it. + */ + if (skb->napi_id < MIN_NAPI_ID) + skb->napi_id = napi->napi_id; #endif } -- cgit v1.2.3 From 05e22e8395058745bd0312bc488b522197852aff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:12:34 -0700 Subject: tcp: remove indirect calls for icsk->icsk_af_ops->queue_xmit Mitigate RETPOLINE costs in __tcp_transmit_skb() by using INDIRECT_CALL_INET() wrapper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 6 +----- include/net/tcp.h | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 04ebe7bf54c6..862c9545833a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -231,11 +231,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct ipcm_cookie *ipc, struct rtable **rtp, struct inet_cork *cork, unsigned int flags); -static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, - struct flowi *fl) -{ - return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos); -} +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 4de9485f73d9..e5d7e0b09924 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From dd2e0b86fc4ee146ac8f3275833d0187efeb950a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:12:35 -0700 Subject: tcp: remove indirect calls for icsk->icsk_af_ops->send_check Mitigate RETPOLINE costs in __tcp_transmit_skb() by using INDIRECT_CALL_INET() wrapper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_checksum.h | 9 --------- include/net/tcp.h | 3 +++ 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 27ec612cd4a4..b3f4eaa88672 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -85,15 +85,6 @@ static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); } -#if IS_ENABLED(CONFIG_IPV6) -static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); -} -#endif - static inline __sum16 udp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, diff --git a/include/net/tcp.h b/include/net/tcp.h index e5d7e0b09924..cd9cc348dbf9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -932,6 +932,9 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) #endif return 0; } + +INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); + #endif static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) -- cgit v1.2.3 From 5cbd3ebde859bd43bd0584c146060638b1a3abb4 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 22 Jun 2020 13:30:28 +0000 Subject: Bluetooth: use configured params for ext adv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the extended advertisement feature is enabled, a hardcoded min and max interval of 0x8000 is used. This patch fixes this issue by using the configured min/max value. This was validated by setting min/max in main.conf and making sure the right setting is applied: < HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25 #93 [hci0] 10.953011 … Min advertising interval: 181.250 msec (0x0122) Max advertising interval: 181.250 msec (0x0122) … Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Daniel Winkler Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 16ab6ce87883..1f18f71363e9 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2516,4 +2516,12 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) #define hci_iso_data_len(h) ((h) & 0x3fff) #define hci_iso_data_flags(h) ((h) >> 14) +/* le24 support */ +static inline void hci_cpu_to_le24(__u32 val, __u8 dst[3]) +{ + dst[0] = val & 0xff; + dst[1] = (val & 0xff00) >> 8; + dst[2] = (val & 0xff0000) >> 16; +} + #endif /* __HCI_H */ -- cgit v1.2.3 From 8746f135bb01872ff412d408ea1aa9ebd328c1f5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 20 May 2020 14:20:14 -0700 Subject: Bluetooth: Disconnect if E0 is used for Level 4 E0 is not allowed with Level 4: BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C page 1319: '128-bit equivalent strength for link and encryption keys required using FIPS approved algorithms (E0 not allowed, SAFER+ not allowed, and P-192 not allowed; encryption key not shortened' SC enabled: > HCI Event: Read Remote Extended Features (0x23) plen 13 Status: Success (0x00) Handle: 256 Page: 1/2 Features: 0x0b 0x00 0x00 0x00 0x00 0x00 0x00 0x00 Secure Simple Pairing (Host Support) LE Supported (Host) Secure Connections (Host Support) > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 256 Encryption: Enabled with AES-CCM (0x02) SC disabled: > HCI Event: Read Remote Extended Features (0x23) plen 13 Status: Success (0x00) Handle: 256 Page: 1/2 Features: 0x03 0x00 0x00 0x00 0x00 0x00 0x00 0x00 Secure Simple Pairing (Host Support) LE Supported (Host) > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 256 Encryption: Enabled with E0 (0x01) [May 8 20:23] Bluetooth: hci0: Invalid security: expect AES but E0 was used < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 256 Reason: Authentication Failure (0x05) Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 77d29341b064..836dc997ff94 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1481,11 +1481,13 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) else encrypt = 0x01; - if (conn->sec_level == BT_SECURITY_SDP) - conn->sec_level = BT_SECURITY_LOW; + if (!status) { + if (conn->sec_level == BT_SECURITY_SDP) + conn->sec_level = BT_SECURITY_LOW; - if (conn->pending_sec_level > conn->sec_level) - conn->sec_level = conn->pending_sec_level; + if (conn->pending_sec_level > conn->sec_level) + conn->sec_level = conn->pending_sec_level; + } mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { -- cgit v1.2.3 From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:43 -0700 Subject: bpf: Support access to bpf map fields There are multiple use-cases when it's convenient to have access to bpf map fields, both `struct bpf_map` and map type specific struct-s such as `struct bpf_array`, `struct bpf_htab`, etc. For example while working with sock arrays it can be necessary to calculate the key based on map->max_entries (some_hash % max_entries). Currently this is solved by communicating max_entries via "out-of-band" channel, e.g. via additional map with known key to get info about target map. That works, but is not very convenient and error-prone while working with many maps. In other cases necessary data is dynamic (i.e. unknown at loading time) and it's impossible to get it at all. For example while working with a hash table it can be convenient to know how much capacity is already used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case). At the same time kernel knows this info and can provide it to bpf program. Fill this gap by adding support to access bpf map fields from bpf program for both `struct bpf_map` and map type specific fields. Support is implemented via btf_struct_access() so that a user can define their own `struct bpf_map` or map type specific struct in their program with only necessary fields and preserve_access_index attribute, cast a map to this struct and use a field. For example: struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; struct bpf_map *map = (struct bpf_map *)&m_array; /* .. use map->max_entries or array->map.max_entries .. */ } Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of corresponding struct. Only reading from map fields is supported. For btf_struct_access() to work there should be a way to know btf id of a struct that corresponds to a map type. To get btf id there should be a way to get a stringified name of map-specific struct, such as "bpf_array", "bpf_htab", etc for a map type. Two new fields are added to `struct bpf_map_ops` to handle it: * .map_btf_name keeps a btf name of a struct returned by map_alloc(); * .map_btf_id is used to cache btf id of that struct. To make btf ids calculation cheaper they're calculated once while preparing btf_vmlinux and cached same way as it's done for btf_id field of `struct bpf_func_proto` While calculating btf ids, struct names are NOT checked for collision. Collisions will be checked as a part of the work to prepare btf ids used in verifier in compile time that should land soon. The only known collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier. Both new fields .map_btf_name and .map_btf_id must be set for a map type for the feature to work. If neither is set for a map type, verifier will return ENOTSUPP on a try to access map_ptr of corresponding type. If just one of them set, it's verifier misconfiguration. Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be supported separately. The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by perfmon_capable() so that unpriv programs won't have access to bpf map fields. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com --- include/linux/bpf.h | 9 +++++++++ include/linux/bpf_verifier.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..1e1501ee53ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -92,6 +92,10 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + + /* BTF name and id of struct allocated by map_alloc */ + const char * const map_btf_name; + int *map_btf_id; }; struct bpf_map_memory { @@ -1109,6 +1113,11 @@ static inline bool bpf_allow_ptr_leaks(void) return perfmon_capable(); } +static inline bool bpf_allow_ptr_to_map_access(void) +{ + return perfmon_capable(); +} + static inline bool bpf_bypass_spec_v1(void) { return perfmon_capable(); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ca08db4ffb5f..53c7bd568c5d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -379,6 +379,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; -- cgit v1.2.3 From 2a916ecc405686c1d86f632281bc06aa75ebae4e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:48 +0000 Subject: net/devlink: Support querying hardware address of port function PCI PF and VF devlink port can manage the function represented by a devlink port. Enable users to query port function's hardware address. Example of a PCI VF port which supports a port function: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:11:22:33:44:66 $ devlink port show pci/0000:06:00.0/2 -jp { "port": { "pci/0000:06:00.0/2": { "type": "eth", "netdev": "enp6s0pf0vf1", "flavour": "pcivf", "pfnum": 0, "vfnum": 1, "function": { "hw_addr": "00:11:22:33:44:66" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 12 ++++++++++++ include/uapi/linux/devlink.h | 10 ++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 1df6dfec26c2..56fc9cdb189d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1107,6 +1107,18 @@ struct devlink_ops { int (*trap_policer_counter_get)(struct devlink *devlink, const struct devlink_trap_policer *policer, u64 *p_drops); + /** + * @port_function_hw_addr_get: Port function's hardware address get function. + * + * Should be used by device drivers to report the hardware address of a function managed + * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port + * function handling for a particular port. + * + * Note: @extack can be NULL when port notifier queries the port function. + */ + int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 08563e6a424d..07d0af8f5923 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -451,6 +451,8 @@ enum devlink_attr { DEVLINK_ATTR_TRAP_POLICER_RATE, /* u64 */ DEVLINK_ATTR_TRAP_POLICER_BURST, /* u64 */ + DEVLINK_ATTR_PORT_FUNCTION, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, @@ -497,4 +499,12 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_function_attr { + DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + + __DEVLINK_PORT_FUNCTION_ATTR_MAX, + DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From a1e8ae907c8d67f57432190bb742802a76516b00 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:49 +0000 Subject: net/devlink: Support setting hardware address of port function PCI PF and VF devlink port can manage the function represented by a devlink port. Allow users to set port function's hardware address. Example of a PCI VF port which supports a port function: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55 $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:11:22:33:44:55 Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 56fc9cdb189d..7007f93585a5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1119,6 +1119,16 @@ struct devlink_ops { int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); + /** + * @port_function_hw_addr_set: Port function's hardware address set function. + * + * Should be used by device drivers to set the hardware address of a function managed + * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port + * function handling for a particular port. + */ + int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) -- cgit v1.2.3 From fa997825ebeca820f4001a9e6d285345d3a535ba Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:50 +0000 Subject: net/mlx5: Constify mac address pointer Since none of the functions need to modify the input mac address, constify them. Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 8170da1e9f70..4db87bcfce7b 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -75,7 +75,7 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline); int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, - u16 vport, u8 *addr); + u16 vport, const u8 *addr); int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, -- cgit v1.2.3 From 272c2330adc9c68284cb0066719160c24bfe605f Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 Jun 2020 10:31:52 -0400 Subject: xfrm: bail early on slave pass over skb This is prep work for initial support of bonding hardware encryption pass-through support. The bonding driver will fill in the slave_dev pointer, and we use that to know not to skb_push() again on a given skb that was already processed on the bond device. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org CC: intel-wired-lan@lists.osuosl.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 094fe682f5d7..e20b2b27ec48 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -127,6 +127,7 @@ struct xfrm_state_walk { struct xfrm_state_offload { struct net_device *dev; + struct net_device *slave_dev; unsigned long offload_handle; unsigned int num_exthdrs; u8 flags; -- cgit v1.2.3 From 18cb261afd7bf50134e5ccacc5ec91ea16efadd4 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 Jun 2020 10:31:55 -0400 Subject: bonding: support hardware encryption offload to slaves Currently, this support is limited to active-backup mode, as I'm not sure about the feasilibity of mapping an xfrm_state's offload handle to multiple hardware devices simultaneously, and we rely on being able to pass some hints to both the xfrm and NIC driver about whether or not they're operating on a slave device. I've tested this atop an Intel x520 device (ixgbe) using libreswan in transport mode, succesfully achieving ~4.3Gbps throughput with netperf (more or less identical to throughput on a bare NIC in this system), as well as successful failover and recovery mid-netperf. v2: just use CONFIG_XFRM_OFFLOAD for wrapping, isolate more code with it CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org CC: intel-wired-lan@lists.osuosl.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/net/bonding.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index aa854a9c01e2..a00e1764e9b1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -238,6 +238,9 @@ struct bonding { struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; +#ifdef CONFIG_XFRM_OFFLOAD + struct xfrm_state *xs; +#endif /* CONFIG_XFRM_OFFLOAD */ }; #define bond_slave_get_rcu(dev) \ -- cgit v1.2.3 From b5872cd0e823e4cb50b3a75cd9522167eeb676a2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 20 Jun 2020 22:01:56 +0530 Subject: devlink: Add support for board.serial_number to info_get cb. Board serial number is a serial number, often available in PCI *Vital Product Data*. Also, update devlink-info.rst documentation file. Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Vasundhara Volam Reviewed-by: Michael Chan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 7007f93585a5..428f55f8197c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1284,6 +1284,8 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn); int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name); +int devlink_info_board_serial_number_put(struct devlink_info_req *req, + const char *bsn); int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 07d0af8f5923..87c83a82991b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -453,6 +453,8 @@ enum devlink_attr { DEVLINK_ATTR_PORT_FUNCTION, /* nested */ + DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a602ea86e9f0d82f5c7ba1d3f7487d4097380b96 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Sun, 21 Jun 2020 10:59:51 +0300 Subject: net: phy: marvell: Add Marvell 88E1340S support Add support for this new phy ID. Signed-off-by: Maxim Kochetkov Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index af6b11d4d673..c4390e9cbf15 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -15,6 +15,7 @@ #define MARVELL_PHY_ID_88E1149R 0x01410e50 #define MARVELL_PHY_ID_88E1240 0x01410e30 #define MARVELL_PHY_ID_88E1318S 0x01410e90 +#define MARVELL_PHY_ID_88E1340S 0x01410dc0 #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 #define MARVELL_PHY_ID_88E1540 0x01410eb0 -- cgit v1.2.3 From f59babf95ef969a18744082ee16e4dfd17743c0b Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Sun, 21 Jun 2020 10:59:52 +0300 Subject: net: phy: marvell: Add Marvell 88E1548P support Add support for this new phy ID. Signed-off-by: Maxim Kochetkov Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index c4390e9cbf15..ff7b7607c8cf 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -20,6 +20,7 @@ #define MARVELL_PHY_ID_88E1510 0x01410dd0 #define MARVELL_PHY_ID_88E1540 0x01410eb0 #define MARVELL_PHY_ID_88E1545 0x01410ea0 +#define MARVELL_PHY_ID_88E1548P 0x01410ec0 #define MARVELL_PHY_ID_88E3016 0x01410e60 #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 -- cgit v1.2.3 From 209edf95da63a0ad19750769f473f4ea1553d21d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 21 Jun 2020 14:46:01 +0300 Subject: net: dsa: felix: call port mdb operations from ocelot This adds the mdb hooks in felix and exports the mdb functions from ocelot. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index fa2c3904049e..80415b63ccfa 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -641,5 +641,9 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); int ocelot_cls_flower_stats(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); +int ocelot_port_mdb_add(struct ocelot *ocelot, int port, + const struct switchdev_obj_port_mdb *mdb); +int ocelot_port_mdb_del(struct ocelot *ocelot, int port, + const struct switchdev_obj_port_mdb *mdb); #endif -- cgit v1.2.3 From 96b029b004942ecdb50e40d3e45cdc8a3aec9135 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 21 Jun 2020 14:46:02 +0300 Subject: net: mscc: ocelot: introduce macros for iterating over PGIDs The current iterators are impossible to understand at first glance without switching back and forth between the definitions and their actual use in the for loops. So introduce some convenience names to help readability. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 80415b63ccfa..e050f8121ba2 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -65,6 +65,21 @@ #define PGID_MCIPV4 62 #define PGID_MCIPV6 63 +#define for_each_unicast_dest_pgid(ocelot, pgid) \ + for ((pgid) = 0; \ + (pgid) < (ocelot)->num_phys_ports; \ + (pgid)++) + +#define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) \ + for ((pgid) = (ocelot)->num_phys_ports + 1; \ + (pgid) < PGID_CPU; \ + (pgid)++) + +#define for_each_aggr_pgid(ocelot, pgid) \ + for ((pgid) = PGID_AGGR; \ + (pgid) < PGID_SRC; \ + (pgid)++) + /* Aggregation PGIDs, one per Link Aggregation Code */ #define PGID_AGGR 64 -- cgit v1.2.3 From d63cc24933c774ea464090af1998a7b63f11c166 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 8 Apr 2020 12:42:09 +0300 Subject: net/mlx5: Export resource dump interface Export some of the resource dump API. mlx5_ib driver will use it in downstream patches. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/rsc_dump.h | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/linux/mlx5/rsc_dump.h (limited to 'include') diff --git a/include/linux/mlx5/rsc_dump.h b/include/linux/mlx5/rsc_dump.h new file mode 100644 index 000000000000..87415fa754fe --- /dev/null +++ b/include/linux/mlx5/rsc_dump.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies inc. */ + +#include + +#ifndef __MLX5_RSC_DUMP +#define __MLX5_RSC_DUMP + +enum mlx5_sgmt_type { + MLX5_SGMT_TYPE_HW_CQPC, + MLX5_SGMT_TYPE_HW_SQPC, + MLX5_SGMT_TYPE_HW_RQPC, + MLX5_SGMT_TYPE_FULL_SRQC, + MLX5_SGMT_TYPE_FULL_CQC, + MLX5_SGMT_TYPE_FULL_EQC, + MLX5_SGMT_TYPE_FULL_QPC, + MLX5_SGMT_TYPE_SND_BUFF, + MLX5_SGMT_TYPE_RCV_BUFF, + MLX5_SGMT_TYPE_SRQ_BUFF, + MLX5_SGMT_TYPE_CQ_BUFF, + MLX5_SGMT_TYPE_EQ_BUFF, + MLX5_SGMT_TYPE_SX_SLICE, + MLX5_SGMT_TYPE_SX_SLICE_ALL, + MLX5_SGMT_TYPE_RDB, + MLX5_SGMT_TYPE_RX_SLICE_ALL, + MLX5_SGMT_TYPE_MENU, + MLX5_SGMT_TYPE_TERMINATE, + + MLX5_SGMT_TYPE_NUM, /* Keep last */ +}; + +struct mlx5_rsc_key { + enum mlx5_sgmt_type rsc; + int index1; + int index2; + int num_of_obj1; + int num_of_obj2; + int size; +}; + +struct mlx5_rsc_dump_cmd; + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key); +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd); +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size); +#endif /* __MLX5_RSC_DUMP */ -- cgit v1.2.3 From 608ca553c9a2008908120e0e45b1cfc4aefcfd49 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 8 Apr 2020 12:36:20 +0300 Subject: net/mlx5: Add support in query QP, CQ and MKEY segments Introduce new resource dump segments - PRM_QUERY_QP, PRM_QUERY_CQ and PRM_QUERY_MKEY. These segments contains the resource dump in PRM query format. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/rsc_dump.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/rsc_dump.h b/include/linux/mlx5/rsc_dump.h index 87415fa754fe..d11c0b228620 100644 --- a/include/linux/mlx5/rsc_dump.h +++ b/include/linux/mlx5/rsc_dump.h @@ -23,6 +23,9 @@ enum mlx5_sgmt_type { MLX5_SGMT_TYPE_SX_SLICE_ALL, MLX5_SGMT_TYPE_RDB, MLX5_SGMT_TYPE_RX_SLICE_ALL, + MLX5_SGMT_TYPE_PRM_QUERY_QP, + MLX5_SGMT_TYPE_PRM_QUERY_CQ, + MLX5_SGMT_TYPE_PRM_QUERY_MKEY, MLX5_SGMT_TYPE_MENU, MLX5_SGMT_TYPE_TERMINATE, -- cgit v1.2.3 From 79a28ddd18e9c653f13f60dfabee15c024e64b9b Mon Sep 17 00:00:00 2001 From: Alexandre Cassen Date: Tue, 23 Jun 2020 10:33:45 +0200 Subject: rtnetlink: add keepalived rtm_protocol Keepalived can set global static ip routes or virtual ip routes dynamically following VRRP protocol states. Using a dedicated rtm_protocol will help keeping track of it. Changes in v2: - fix tab/space indenting Signed-off-by: Alexandre Cassen Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 073e71ef6bdd..879e64950a0a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -257,12 +257,12 @@ enum { /* rtm_protocol */ -#define RTPROT_UNSPEC 0 -#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; - not used by current IPv4 */ -#define RTPROT_KERNEL 2 /* Route installed by kernel */ -#define RTPROT_BOOT 3 /* Route installed during boot */ -#define RTPROT_STATIC 4 /* Route installed by administrator */ +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ /* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; they are just passed from user and back as is. @@ -271,22 +271,23 @@ enum { avoid conflicts. */ -#define RTPROT_GATED 8 /* Apparently, GateD */ -#define RTPROT_RA 9 /* RDISC/ND router advertisements */ -#define RTPROT_MRT 10 /* Merit MRT */ -#define RTPROT_ZEBRA 11 /* Zebra */ -#define RTPROT_BIRD 12 /* BIRD */ -#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ -#define RTPROT_XORP 14 /* XORP */ -#define RTPROT_NTK 15 /* Netsukuku */ -#define RTPROT_DHCP 16 /* DHCP client */ -#define RTPROT_MROUTED 17 /* Multicast daemon */ -#define RTPROT_BABEL 42 /* Babel daemon */ -#define RTPROT_BGP 186 /* BGP Routes */ -#define RTPROT_ISIS 187 /* ISIS Routes */ -#define RTPROT_OSPF 188 /* OSPF Routes */ -#define RTPROT_RIP 189 /* RIP Routes */ -#define RTPROT_EIGRP 192 /* EIGRP Routes */ +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ /* rtm_scope -- cgit v1.2.3 From bdb7b79b4ce864a724250e1d35948c46f135de36 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 22 Jun 2020 20:22:21 -0700 Subject: bpf: Switch most helper return values from 32-bit int to 64-bit long Switch most of BPF helper definitions from returning int to long. These definitions are coming from comments in BPF UAPI header and are used to generate bpf_helper_defs.h (under libbpf) to be later included and used from BPF programs. In actual in-kernel implementation, all the helpers are defined as returning u64, but due to some historical reasons, most of them are actually defined as returning int in UAPI (usually, to return 0 on success, and negative value on error). This actually causes Clang to quite often generate sub-optimal code, because compiler believes that return value is 32-bit, and in a lot of cases has to be up-converted (usually with a pair of 32-bit bit shifts) to 64-bit values, before they can be used further in BPF code. Besides just "polluting" the code, these 32-bit shifts quite often cause problems for cases in which return value matters. This is especially the case for the family of bpf_probe_read_str() functions. There are few other similar helpers (e.g., bpf_read_branch_records()), in which return value is used by BPF program logic to record variable-length data and process it. For such cases, BPF program logic carefully manages offsets within some array or map to read variable-length data. For such uses, it's crucial for BPF verifier to track possible range of register values to prove that all the accesses happen within given memory bounds. Those extraneous zero-extending bit shifts, inserted by Clang (and quite often interleaved with other code, which makes the issues even more challenging and sometimes requires employing extra per-variable compiler barriers), throws off verifier logic and makes it mark registers as having unknown variable offset. We'll study this pattern a bit later below. Another common pattern is to check return of BPF helper for non-zero state to detect error conditions and attempt alternative actions in such case. Even in this simple and straightforward case, this 32-bit vs BPF's native 64-bit mode quite often leads to sub-optimal and unnecessary extra code. We'll look at this pattern as well. Clang's BPF target supports two modes of code generation: ALU32, in which it is capable of using lower 32-bit parts of registers, and no-ALU32, in which only full 64-bit registers are being used. ALU32 mode somewhat mitigates the above described problems, but not in all cases. This patch switches all the cases in which BPF helpers return 0 or negative error from returning int to returning long. It is shown below that such change in definition leads to equivalent or better code. No-ALU32 mode benefits more, but ALU32 mode doesn't degrade or still gets improved code generation. Another class of cases switched from int to long are bpf_probe_read_str()-like helpers, which encode successful case as non-negative values, while still returning negative value for errors. In all of such cases, correctness is preserved due to two's complement encoding of negative values and the fact that all helpers return values with 32-bit absolute value. Two's complement ensures that for negative values higher 32 bits are all ones and when truncated, leave valid negative 32-bit value with the same value. Non-negative values have upper 32 bits set to zero and similarly preserve value when high 32 bits are truncated. This means that just casting to int/u32 is correct and efficient (and in ALU32 mode doesn't require any extra shifts). To minimize the chances of regressions, two code patterns were investigated, as mentioned above. For both patterns, BPF assembly was analyzed in ALU32/NO-ALU32 compiler modes, both with current 32-bit int return type and new 64-bit long return type. Case 1. Variable-length data reading and concatenation. This is quite ubiquitous pattern in tracing/monitoring applications, reading data like process's environment variables, file path, etc. In such case, many pieces of string-like variable-length data are read into a single big buffer, and at the end of the process, only a part of array containing actual data is sent to user-space for further processing. This case is tested in test_varlen.c selftest (in the next patch). Code flow is roughly as follows: void *payload = &sample->payload; u64 len; len = bpf_probe_read_kernel_str(payload, MAX_SZ1, &source_data1); if (len <= MAX_SZ1) { payload += len; sample->len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_SZ2, &source_data2); if (len <= MAX_SZ2) { payload += len; sample->len2 = len; } /* and so on */ sample->total_len = payload - &sample->payload; /* send over, e.g., perf buffer */ There could be two variations with slightly different code generated: when len is 64-bit integer and when it is 32-bit integer. Both variations were analysed. BPF assembly instructions between two successive invocations of bpf_probe_read_kernel_str() were used to check code regressions. Results are below, followed by short analysis. Left side is using helpers with int return type, the right one is after the switch to long. ALU32 + INT ALU32 + LONG =========== ============ 64-BIT (13 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +9 18: if r0 > 256 goto +6 19: w1 = w0 19: r1 = 0 ll 20: r1 <<= 32 21: *(u64 *)(r1 + 0) = r0 21: r1 s>>= 32 22: r6 = 0 ll 22: r2 = 0 ll 24: r6 += r0 24: *(u64 *)(r2 + 0) = r1 00000000000000c8 : 25: r6 = 0 ll 25: r1 = r6 27: r6 += r1 26: w2 = 256 00000000000000e0 : 27: r3 = 0 ll 28: r1 = r6 29: call 115 29: w2 = 256 30: r3 = 0 ll 32: call 115 32-BIT (11 insns): 32-BIT (12 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +7 18: if w0 > 256 goto +8 19: r1 = 0 ll 19: r1 = 0 ll 21: *(u32 *)(r1 + 0) = r0 21: *(u32 *)(r1 + 0) = r0 22: w1 = w0 22: r0 <<= 32 23: r6 = 0 ll 23: r0 >>= 32 25: r6 += r1 24: r6 = 0 ll 00000000000000d0 : 26: r6 += r0 26: r1 = r6 00000000000000d8 : 27: w2 = 256 27: r1 = r6 28: r3 = 0 ll 28: w2 = 256 30: call 115 29: r3 = 0 ll 31: call 115 In ALU32 mode, the variant using 64-bit length variable clearly wins and avoids unnecessary zero-extension bit shifts. In practice, this is even more important and good, because BPF code won't need to do extra checks to "prove" that payload/len are within good bounds. 32-bit len is one instruction longer. Clang decided to do 64-to-32 casting with two bit shifts, instead of equivalent `w1 = w0` assignment. The former uses extra register. The latter might potentially lose some range information, but not for 32-bit value. So in this case, verifier infers that r0 is [0, 256] after check at 18:, and shifting 32 bits left/right keeps that range intact. We should probably look into Clang's logic and see why it chooses bitshifts over sub-register assignments for this. NO-ALU32 + INT NO-ALU32 + LONG ============== =============== 64-BIT (14 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r0 <<= 32 18: if r0 > 256 goto +6 19: r1 = r0 19: r1 = 0 ll 20: r1 >>= 32 21: *(u64 *)(r1 + 0) = r0 21: if r1 > 256 goto +7 22: r6 = 0 ll 22: r0 s>>= 32 24: r6 += r0 23: r1 = 0 ll 00000000000000c8 : 25: *(u64 *)(r1 + 0) = r0 25: r1 = r6 26: r6 = 0 ll 26: r2 = 256 28: r6 += r0 27: r3 = 0 ll 00000000000000e8 : 29: call 115 29: r1 = r6 30: r2 = 256 31: r3 = 0 ll 33: call 115 32-BIT (13 insns): 32-BIT (13 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r1 = r0 18: r1 = r0 19: r1 <<= 32 19: r1 <<= 32 20: r1 >>= 32 20: r1 >>= 32 21: if r1 > 256 goto +6 21: if r1 > 256 goto +6 22: r2 = 0 ll 22: r2 = 0 ll 24: *(u32 *)(r2 + 0) = r0 24: *(u32 *)(r2 + 0) = r0 25: r6 = 0 ll 25: r6 = 0 ll 27: r6 += r1 27: r6 += r1 00000000000000e0 : 00000000000000e0 : 28: r1 = r6 28: r1 = r6 29: r2 = 256 29: r2 = 256 30: r3 = 0 ll 30: r3 = 0 ll 32: call 115 32: call 115 In NO-ALU32 mode, for the case of 64-bit len variable, Clang generates much superior code, as expected, eliminating unnecessary bit shifts. For 32-bit len, code is identical. So overall, only ALU-32 32-bit len case is more-or-less equivalent and the difference stems from internal Clang decision, rather than compiler lacking enough information about types. Case 2. Let's look at the simpler case of checking return result of BPF helper for errors. The code is very simple: long bla; if (bpf_probe_read_kenerl(&bla, sizeof(bla), 0)) return 1; else return 0; ALU32 + CHECK (9 insns) ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: w2 = 8 2: w2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: w1 = w0 5: r1 = r0 6: w0 = 1 6: w0 = 1 7: if w1 != 0 goto +1 7: if r1 != 0 goto +1 8: w0 = 0 8: w0 = 0 0000000000000048 : 0000000000000048 : 9: exit 9: exit Almost identical code, the only difference is the use of full register assignment (r1 = r0) vs half-registers (w1 = w0) in instruction #5. On 32-bit architectures, new BPF assembly might be slightly less optimal, in theory. But one can argue that's not a big issue, given that use of full registers is still prevalent (e.g., for parameter passing). NO-ALU32 + CHECK (11 insns) NO-ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: r2 = 8 2: r2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: r1 = r0 5: r1 = r0 6: r1 <<= 32 6: r0 = 1 7: r1 >>= 32 7: if r1 != 0 goto +1 8: r0 = 1 8: r0 = 0 9: if r1 != 0 goto +1 0000000000000048 : 10: r0 = 0 9: exit 0000000000000058 : 11: exit NO-ALU32 is a clear improvement, getting rid of unnecessary zero-extension bit shifts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200623032224.4020118-1-andriin@fb.com --- include/uapi/linux/bpf.h | 192 +++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 96 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..be0efee49093 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1630,7 +1630,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1676,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1697,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1708,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1727,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1756,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1806,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1842,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1867,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1911,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1925,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1959,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1977,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2008,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2026,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2040,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2056,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2089,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2111,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2142,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2161,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2175,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2189,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2226,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2241,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2257,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2286,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2305,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2370,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2471,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2479,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2489,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2517,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2529,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2543,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2591,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2614,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2651,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2666,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2682,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2701,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2718,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2735,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2759,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2810,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2818,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2859,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2883,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2941,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2949,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2976,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2995,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3007,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3062,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3097,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3126,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3221,7 +3221,7 @@ union bpf_attr { * Return * Requested value, or 0, if flags are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform -- cgit v1.2.3 From e678e9ddea96590888b034e2a7ad1128bf1ea1ea Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 23 Jun 2020 09:42:31 -0700 Subject: indirect_call_wrapper: extend indirect wrapper to support up to 4 calls There are many places where 2 annotations are not enough. This patch adds INDIRECT_CALL_3 and INDIRECT_CALL_4 to cover such cases. Signed-off-by: Brian Vazquez Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/indirect_call_wrapper.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 00d7e8e919c6..54c02c84906a 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -23,6 +23,16 @@ likely(f == f2) ? f2(__VA_ARGS__) : \ INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ }) +#define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ + ({ \ + likely(f == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + }) +#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ + ({ \ + likely(f == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + }) #define INDIRECT_CALLABLE_DECLARE(f) f #define INDIRECT_CALLABLE_SCOPE @@ -30,6 +40,8 @@ #else #define INDIRECT_CALL_1(f, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALL_2(f, f2, f1, ...) f(__VA_ARGS__) +#define INDIRECT_CALL_3(f, f3, f2, f1, ...) f(__VA_ARGS__) +#define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALLABLE_DECLARE(f) #define INDIRECT_CALLABLE_SCOPE static #endif -- cgit v1.2.3 From 55cced4f813bece01f96256d96f283b9210d19ee Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 23 Jun 2020 09:42:32 -0700 Subject: ipv6: fib6: avoid indirect calls from fib6_rule_lookup It was reported that a considerable amount of cycles were spent on the expensive indirect calls on fib6_rule_lookup. This patch introduces an inline helper called pol_route_func that uses the indirect_call_wrappers to avoid the indirect calls. This patch saves around 50ns per call. Performance was measured on the receiver by checking the amount of syncookies that server was able to generate under a synflood load. Traffic was generated using trafgen[1] which was pushing around 1Mpps on a single queue. Receiver was using only one rx queue which help to create a bottle neck and make the experiment rx-bounded. These are the syncookies generated over 10s from the different runs: Whithout the patch: TcpExtSyncookiesSent 3553749 0.0 TcpExtSyncookiesSent 3550895 0.0 TcpExtSyncookiesSent 3553845 0.0 TcpExtSyncookiesSent 3541050 0.0 TcpExtSyncookiesSent 3539921 0.0 TcpExtSyncookiesSent 3557659 0.0 TcpExtSyncookiesSent 3526812 0.0 TcpExtSyncookiesSent 3536121 0.0 TcpExtSyncookiesSent 3529963 0.0 TcpExtSyncookiesSent 3536319 0.0 With the patch: TcpExtSyncookiesSent 3611786 0.0 TcpExtSyncookiesSent 3596682 0.0 TcpExtSyncookiesSent 3606878 0.0 TcpExtSyncookiesSent 3599564 0.0 TcpExtSyncookiesSent 3601304 0.0 TcpExtSyncookiesSent 3609249 0.0 TcpExtSyncookiesSent 3617437 0.0 TcpExtSyncookiesSent 3608765 0.0 TcpExtSyncookiesSent 3620205 0.0 TcpExtSyncookiesSent 3601895 0.0 Without the patch the average is 354263 pkt/s or 2822 ns/pkt and with the patch the average is 360738 pkt/s or 2772 ns/pkt which gives an estimate of 50 ns per packet. [1] http://netsniff-ng.org/ Changelog since v1: - Change ordering in the ICW (Paolo Abeni) Cc: Luigi Rizzo Cc: Paolo Abeni Reported-by: Eric Dumazet Signed-off-by: Brian Vazquez Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3f615a29766e..cc8356fd927f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -552,6 +553,41 @@ struct bpf_iter__ipv6_route { }; #endif +INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags)); +INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags)); +INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags)); +INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags)); +static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup, + struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) +{ + return INDIRECT_CALL_4(lookup, + ip6_pol_route_output, + ip6_pol_route_input, + ip6_pol_route_lookup, + __ip6_route_redirect, + net, table, fl6, skb, flags); +} + #ifdef CONFIG_IPV6_MULTIPLE_TABLES static inline bool fib6_has_custom_rules(const struct net *net) { -- cgit v1.2.3 From bdfd2d1fa79acd03e18d1683419572f3682b39fd Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 23 Jun 2020 16:40:01 -0400 Subject: bonding/xfrm: use real_dev instead of slave_dev Rather than requiring every hw crypto capable NIC driver to do a check for slave_dev being set, set real_dev in the xfrm layer and xso init time, and then override it in the bonding driver as needed. Then NIC drivers can always use real_dev, and at the same time, we eliminate the use of a variable name that probably shouldn't have been used in the first place, particularly given recent current events. CC: Boris Pismenny CC: Saeed Mahameed CC: Leon Romanovsky CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org Suggested-by: Saeed Mahameed Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e20b2b27ec48..e648c9e6c919 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -127,7 +127,7 @@ struct xfrm_state_walk { struct xfrm_state_offload { struct net_device *dev; - struct net_device *slave_dev; + struct net_device *real_dev; unsigned long offload_handle; unsigned int num_exthdrs; u8 flags; -- cgit v1.2.3 From 243600ee660531d2b5b5ef3faab90c5f8ff4c2b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:11 -0700 Subject: tcp: add declarations to avoid warnings Remove these errors: net/ipv6/tcp_ipv6.c:1550:29: warning: symbol 'tcp_v6_rcv' was not declared. Should it be static? net/ipv6/tcp_ipv6.c:1770:30: warning: symbol 'tcp_v6_early_demux' was not declared. Should it be static? net/ipv6/tcp_ipv6.c:1550:29: warning: no previous prototype for 'tcp_v6_rcv' [-Wmissing-prototypes] 1550 | INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) | ^~~~~~~~~~ net/ipv6/tcp_ipv6.c:1770:30: warning: no previous prototype for 'tcp_v6_early_demux' [-Wmissing-prototypes] 1770 | INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) | ^~~~~~~~~~~~~~~~~~ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index cd9cc348dbf9..a8c36fa886a4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -934,6 +934,8 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) } INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); #endif -- cgit v1.2.3 From b03d2142bea8cf7407a0a668ce8f5f115bd226c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:12 -0700 Subject: tcp: move ipv6_specific declaration to remove a warning ipv6_specific should be declared in tcp include files, not mptcp. This removes the following warning : CHECK net/ipv6/tcp_ipv6.c net/ipv6/tcp_ipv6.c:78:42: warning: symbol 'ipv6_specific' was not declared. Should it be static? Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8c36fa886a4..e6920ae0765c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -933,6 +933,8 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) return 0; } +extern const struct inet_connection_sock_af_ops ipv6_specific; + INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); -- cgit v1.2.3 From 9b9e2f250e3e6f59ad07e6d03838c27a100e0042 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:13 -0700 Subject: tcp: move ipv4_specific to tcp include file Declare ipv4_specific once, in tcp.h were it belongs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ include/net/transp_v6.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e6920ae0765c..b0f0f93c681c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -907,6 +907,8 @@ static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) TCP_SKB_CB(skb)->bpf.sk_redir = NULL; } +extern const struct inet_connection_sock_af_ops ipv4_specific; + #if IS_ENABLED(CONFIG_IPV6) /* This is the variant of inet6_iif() that must be used by TCP, * as TCP moves IP6CB into a different location in skb->cb[] diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index a8f6020f1196..da06613c9603 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -56,9 +56,6 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006) -/* address family specific functions */ -extern const struct inet_connection_sock_af_ops ipv4_specific; - void inet6_destroy_sock(struct sock *sk); #define IPV6_SEQ_DGRAM_HEADER \ -- cgit v1.2.3 From 5521d95e076238f1615cf1cdb135f318ef798b49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:14 -0700 Subject: net: move tcp gro declarations to net/tcp.h This patch removes following (C=1 W=1) warnings for CONFIG_RETPOLINE=y : net/ipv4/tcp_offload.c:306:16: warning: symbol 'tcp4_gro_receive' was not declared. Should it be static? net/ipv4/tcp_offload.c:306:17: warning: no previous prototype for 'tcp4_gro_receive' [-Wmissing-prototypes] net/ipv4/tcp_offload.c:319:29: warning: symbol 'tcp4_gro_complete' was not declared. Should it be static? net/ipv4/tcp_offload.c:319:29: warning: no previous prototype for 'tcp4_gro_complete' [-Wmissing-prototypes] CHECK net/ipv6/tcpv6_offload.c net/ipv6/tcpv6_offload.c:16:16: warning: symbol 'tcp6_gro_receive' was not declared. Should it be static? net/ipv6/tcpv6_offload.c:29:29: warning: symbol 'tcp6_gro_complete' was not declared. Should it be static? CC net/ipv6/tcpv6_offload.o net/ipv6/tcpv6_offload.c:16:17: warning: no previous prototype for 'tcp6_gro_receive' [-Wmissing-prototypes] 16 | struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv6/tcpv6_offload.c:29:29: warning: no previous prototype for 'tcp6_gro_complete' [-Wmissing-prototypes] 29 | INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) | ^~~~~~~~~~~~~~~~~ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index b0f0f93c681c..27f848ab3995 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1957,6 +1957,10 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); +INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); int tcp_gro_complete(struct sk_buff *skb); void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); -- cgit v1.2.3 From 6db693285cd109e566c553694002dea769612b24 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:15 -0700 Subject: udp: move gro declarations to net/udp.h This removes following warnings : CC net/ipv4/udp_offload.o net/ipv4/udp_offload.c:504:17: warning: no previous prototype for 'udp4_gro_receive' [-Wmissing-prototypes] 504 | struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv4/udp_offload.c:584:29: warning: no previous prototype for 'udp4_gro_complete' [-Wmissing-prototypes] 584 | INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) | ^~~~~~~~~~~~~~~~~ CHECK net/ipv6/udp_offload.c net/ipv6/udp_offload.c:115:16: warning: symbol 'udp6_gro_receive' was not declared. Should it be static? net/ipv6/udp_offload.c:148:29: warning: symbol 'udp6_gro_complete' was not declared. Should it be static? CC net/ipv6/udp_offload.o net/ipv6/udp_offload.c:115:17: warning: no previous prototype for 'udp6_gro_receive' [-Wmissing-prototypes] 115 | struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv6/udp_offload.c:148:29: warning: no previous prototype for 'udp6_gro_complete' [-Wmissing-prototypes] 148 | INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) | ^~~~~~~~~~~~~~~~~ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index a8fa6c0c6ded..5a2d677432f0 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -27,6 +27,7 @@ #include #include #include +#include /** * struct udp_skb_cb - UDP(-Lite) private variables @@ -166,6 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, __be16 dport); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); -- cgit v1.2.3 From 6f3934576853a4fa60dea74ac8822f0f016ef9e8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Jun 2020 18:07:41 -0500 Subject: net: ipv6: Use struct_size() helper and kcalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. Also, remove unnecessary function ipv6_rpl_srh_alloc_size() and replace kzalloc() with kcalloc(), which has a 2-factor argument form for multiplication. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/net/rpl.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/rpl.h b/include/net/rpl.h index dceff60e8baf..308ef0a05cae 100644 --- a/include/net/rpl.h +++ b/include/net/rpl.h @@ -26,12 +26,6 @@ static inline void rpl_exit(void) {} /* Worst decompression memory usage ipv6 address (16) + pad 7 */ #define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7) -static inline size_t ipv6_rpl_srh_alloc_size(unsigned char n) -{ - return sizeof(struct ipv6_rpl_sr_hdr) + - ((n + 1) * sizeof(struct in6_addr)); -} - size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, unsigned char cmpre); -- cgit v1.2.3 From 0cc8fecf041d3e5285380da62cc6662bdc942d8c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 22 Jun 2020 20:35:32 +0530 Subject: net: phy: Allow mdio buses to auto-probe c45 devices The mdiobus_scan logic is currently hardcoded to only work with c22 devices. This works fairly well in most cases, but its possible that a c45 device doesn't respond despite being a standard phy. If the parent hardware is capable, it makes sense to scan for c22 devices before falling back to c45. As we want this to reflect the capabilities of the STA, lets add a field to the mii_bus structure to represent the capability. That way devices can opt into the extended scanning. Existing users should continue to default to c22 only scanning as long as they are zero'ing the structure before use. Signed-off-by: Jeremy Linton Signed-off-by: Calvin Johnson Signed-off-by: David S. Miller --- include/linux/phy.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9248dd2ce4ca..7860d56c6bf5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -298,6 +298,14 @@ struct mii_bus { /* RESET GPIO descriptor pointer */ struct gpio_desc *reset_gpiod; + /* bus capabilities, used for probing */ + enum { + MDIOBUS_NO_CAP = 0, + MDIOBUS_C22, + MDIOBUS_C45, + MDIOBUS_C22_C45, + } probe_capabilities; + /* protect access to the shared element */ struct mutex shared_lock; -- cgit v1.2.3 From 428d2459cceb77357b81c242ca22462a6a904817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Sat, 30 May 2020 14:39:12 +0200 Subject: xfrm: introduce oseq-may-wrap flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 4303 in section 3.3.3 suggests to disable anti-replay for manually distributed ICVs in which case the sender does not need to monitor or reset the counter. However, the sender still increments the counter and when it reaches the maximum value, the counter rolls over back to zero. This patch introduces new extra_flag XFRM_SA_XFLAG_OSEQ_MAY_WRAP which allows sequence number to cycle in outbound packets if set. This flag is used only in legacy and bmp code, because esn should not be negotiated if anti-replay is disabled (see note in 3.3.3 section). Signed-off-by: Petr Vaněk Acked-by: Christophe Gouault Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ff7cfdc6cb44..ffc6a5391bb7 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -387,6 +387,7 @@ struct xfrm_usersa_info { }; #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +#define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 struct xfrm_usersa_id { xfrm_address_t daddr; -- cgit v1.2.3 From dfde1d7dee9bfd095a4f16c9e0579a10f4092e81 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:50 +0300 Subject: sock: Move sock_valbool_flag to header This is preparation for usage in bpf_setsockopt. Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-1-zeil@yandex-team.ru --- include/net/sock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c53cc42b5ab9..8ba438b671d7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -879,6 +879,15 @@ static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag) __clear_bit(flag, &sk->sk_flags); } +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, + int valbool) +{ + if (valbool) + sock_set_flag(sk, bit); + else + sock_reset_flag(sk, bit); +} + static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) { return test_bit(flag, &sk->sk_flags); -- cgit v1.2.3 From aad4a0a9513af962137c4842463d11ed491eec37 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:51 +0300 Subject: tcp: Expose tcp_sock_set_keepidle_locked This is preparation for usage in bpf_setsockopt. v2: - remove redundant EXPORT_SYMBOL (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200620153052.9439-2-zeil@yandex-team.ru --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9aac824c523c..3bdec31ce8f4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -499,6 +499,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepcnt(struct sock *sk, int val); +int tcp_sock_set_keepidle_locked(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); -- cgit v1.2.3 From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:52 +0300 Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt This patch adds support of SO_KEEPALIVE flag and TCP related options to bpf_setsockopt() routine. This is helpful if we want to enable or tune TCP keepalive for applications which don't do it in the userspace code. v3: - update kernel-doc in uapi (Nikita Vetoshkin ) v4: - update kernel-doc in tools too (Alexei Starovoitov) - add test to selftests (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9d3923e6b860..d9737d51dd19 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1621,10 +1621,13 @@ union bpf_attr { * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return -- cgit v1.2.3 From 0ef44e5cab8dbf0a0327871b48fe7c8425d0d885 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 23 Jun 2020 16:30:07 +0200 Subject: net: phy: add support for a common probe between shared PHYs Shared PHYs (PHYs in the same hardware package) may have shared registers and their drivers would usually need to share information. There is currently a way to have a shared (part of the) init, by using phy_package_init_once(). This patch extends the logic to share parts of the probe to allow sharing the initialization of locks or resources retrieval. Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7860d56c6bf5..6fb8f302978d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -244,7 +244,8 @@ struct phy_package_shared { }; /* used as bit number in atomic bitops */ -#define PHY_SHARED_F_INIT_DONE 0 +#define PHY_SHARED_F_INIT_DONE 0 +#define PHY_SHARED_F_PROBE_DONE 1 /* * The Bus class for PHYs. Devices which provide access to @@ -1566,14 +1567,25 @@ static inline int __phy_package_write(struct phy_device *phydev, return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); } -static inline bool phy_package_init_once(struct phy_device *phydev) +static inline bool __phy_package_set_once(struct phy_device *phydev, + unsigned int b) { struct phy_package_shared *shared = phydev->shared; if (!shared) return false; - return !test_and_set_bit(PHY_SHARED_F_INIT_DONE, &shared->flags); + return !test_and_set_bit(b, &shared->flags); +} + +static inline bool phy_package_init_once(struct phy_device *phydev) +{ + return __phy_package_set_once(phydev, PHY_SHARED_F_INIT_DONE); +} + +static inline bool phy_package_probe_once(struct phy_device *phydev) +{ + return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); } extern struct bus_type mdio_bus_type; -- cgit v1.2.3 From 899426b3bdd947541ba4af8c767575889c8b842a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:16 +0300 Subject: net: neighbor: add fdb extended attribute Add an attribute to NDA which will contain all future fdb-specific attributes in order to avoid polluting the NDA namespace with e.g. bridge or vxlan specific attributes. The attribute is called NDA_FDB_EXT_ATTRS and the structure would look like: [NDA_FDB_EXT_ATTRS] = { [NFEA_xxx] } Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index eefcda8ca44e..540ff48402a1 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -30,6 +30,7 @@ enum { NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ NDA_NH_ID, + NDA_FDB_EXT_ATTRS, __NDA_MAX }; @@ -172,4 +173,15 @@ enum { }; #define NDTA_MAX (__NDTA_MAX - 1) +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * ... + * } + */ +enum { + NFEA_UNSPEC, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + #endif -- cgit v1.2.3 From 31cbc39b6344916c20452e43a9171009214c409c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:17 +0300 Subject: net: bridge: add option to allow activity notifications for any fdb entries This patch adds the ability to notify about activity of any entries (static, permanent or ext_learn). EVPN multihoming peers need it to properly and efficiently handle mac sync (peer active/locally active). We add a new NFEA_ACTIVITY_NOTIFY attribute which is used to dump the current activity state and to control if static entries should be monitored at all. We use 2 bits - one to activate fdb entry tracking (disabled by default) and the second to denote that an entry is inactive. We need the second bit in order to avoid multiple notifications of inactivity. Obviously this makes no difference for dynamic entries since at the time of inactivity they get deleted, while the tracked non-dynamic entries get the inactive bit set and get a notification. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 540ff48402a1..21e569297355 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -173,13 +173,24 @@ enum { }; #define NDTA_MAX (__NDTA_MAX - 1) + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + /* embedded into NDA_FDB_EXT_ATTRS: * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] * ... * } */ enum { NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, __NFEA_MAX }; #define NFEA_MAX (__NFEA_MAX - 1) -- cgit v1.2.3 From b5f1d9ec283bd28a452cf61d7e5c2f2b1a9cccda Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:18 +0300 Subject: net: bridge: add a flag to avoid refreshing fdb when changing/adding When we modify or create a new fdb entry sometimes we want to avoid refreshing its activity in order to track it properly. One example is when a mac is received from EVPN multi-homing peer by FRR, which doesn't want to change local activity accounting. It makes it static and sets a flag to track its activity. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 21e569297355..dc8b72201f6c 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -191,6 +191,7 @@ enum { enum { NFEA_UNSPEC, NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, __NFEA_MAX }; #define NFEA_MAX (__NFEA_MAX - 1) -- cgit v1.2.3 From b08d4d3b6c0460306e8a0608413b201705200d33 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:04 -0700 Subject: net: bpf: Add bpf_seq_afinfo in tcp_iter_state A new field bpf_seq_afinfo is added to tcp_iter_state to provide bpf tcp iterator afinfo. There are two reasons on why we did this. First, the current way to get afinfo from PDE_DATA does not work for bpf iterator as its seq_file inode does not conform to /proc/net/{tcp,tcp6} inode structures. More specifically, anonymous bpf iterator will use an anonymous inode which is shared in the system and we cannot change inode private data structure at all. Second, bpf iterator for tcp/tcp6 wants to traverse all tcp and tcp6 sockets in one pass and bpf program can control whether they want to skip one sk_family or not. Having a different afinfo with family AF_UNSPEC make it easier to understand in the code. This patch does not change /proc/net/{tcp,tcp6} behavior as the bpf_seq_afinfo will be NULL for these two proc files. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230804.3987829-1-yhs@fb.com --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4de9485f73d9..eab1c7d0facb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1935,6 +1935,7 @@ struct tcp_iter_state { struct seq_net_private p; enum tcp_seq_states state; struct sock *syn_wait_sk; + struct tcp_seq_afinfo *bpf_seq_afinfo; int bucket, offset, sbucket, num; loff_t last_pos; }; -- cgit v1.2.3 From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:09 -0700 Subject: bpf: Add bpf_skc_to_tcp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a tcp6_sock pointer. The return value could be NULL if the casting is illegal. A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added so the verifier is able to deduce proper return types for the helper. Different from the previous BTF_ID based helpers, the bpf_skc_to_tcp6_sock() argument can be several possible btf_ids. More specifically, all possible socket data structures with sock_common appearing in the first in the memory layout. This patch only added socket types related to tcp and udp. All possible argument btf_id and return value btf_id for helper bpf_skc_to_tcp6_sock() are pre-calculcated and cached. In the future, it is even possible to precompute these btf_id's at kernel build time. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com --- include/linux/bpf.h | 12 ++++++++++++ include/uapi/linux/bpf.h | 9 ++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e1501ee53ce..c0e38ad07848 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -265,6 +265,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ + RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -287,6 +288,12 @@ struct bpf_func_proto { enum bpf_arg_type arg_type[5]; }; int *btf_id; /* BTF ids of arguments */ + bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is + * valid. Often used if more + * than one btf id is permitted + * for this argument. + */ + int *ret_btf_id; /* return value btf_id */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -1524,6 +1531,7 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map) struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); +void init_btf_sock_ids(struct btf *btf); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) @@ -1549,6 +1557,9 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) static inline void bpf_map_offload_map_free(struct bpf_map *map) { } +static inline void init_btf_sock_ids(struct btf *btf) +{ +} #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) @@ -1638,6 +1649,7 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d9737d51dd19..e90ad07b291a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3255,6 +3255,12 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3392,7 +3398,8 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:11 -0700 Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers Three more helpers are added to cast a sock_common pointer to an tcp_sock, tcp_timewait_sock or a tcp_request_sock for tracing programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c0e38ad07848..c23998cf6699 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1650,6 +1650,9 @@ extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e90ad07b291a..b9412ab275f3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3261,6 +3261,24 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3399,7 +3417,10 @@ union bpf_attr { FN(ringbuf_discard), \ FN(ringbuf_query), \ FN(csum_level), \ - FN(skc_to_tcp6_sock), + FN(skc_to_tcp6_sock), \ + FN(skc_to_tcp_sock), \ + FN(skc_to_tcp_timewait_sock), \ + FN(skc_to_tcp_request_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 9e8ca27afab6c92477b459f6a5d2af0cd3197c20 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:12 -0700 Subject: net: bpf: Add bpf_seq_afinfo in udp_iter_state Similar to tcp_iter_state, a new field bpf_seq_afinfo is added to udp_iter_state to provide bpf udp iterator afinfo. This does not change /proc/net/{udp, udp6} behavior. But it enables bpf iterator to avoid get afinfo from PDE_DATA and iterate through all udp and udp6 sockets in one pass. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230812.3988347-1-yhs@fb.com --- include/net/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index a8fa6c0c6ded..67c8b7368845 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -440,6 +440,7 @@ struct udp_seq_afinfo { struct udp_iter_state { struct seq_net_private p; int bucket; + struct udp_seq_afinfo *bpf_seq_afinfo; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); -- cgit v1.2.3 From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:15 -0700 Subject: bpf: Add bpf_skc_to_udp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a udp6_sock pointer. The return value could be NULL if the casting is illegal. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c23998cf6699..3d2ade703a35 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1653,6 +1653,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9412ab275f3..0cb8ec948816 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3279,6 +3279,12 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3420,7 +3426,8 @@ union bpf_attr { FN(skc_to_tcp6_sock), \ FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), + FN(skc_to_tcp_request_sock), \ + FN(skc_to_udp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 19e528dc9af29169fa7cdfa61071805fdef504c6 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Wed, 24 Jun 2020 17:36:28 +0800 Subject: net: qos: add tc police offloading action with max frame size limit Current police offloading support the 'burst'' and 'rate_bytes_ps'. Some hardware own the capability to limit the frame size. If the frame size larger than the setting, the frame would be dropped. For the police action itself already accept the 'mtu' parameter in tc command. But not extend to tc flower offloading. So extend 'mtu' to tc flower offloading. Signed-off-by: Po Liu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 + include/net/tc_act/tc_police.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 00c15f14c434..c2ef19c6b27d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -234,6 +234,7 @@ struct flow_action_entry { struct { /* FLOW_ACTION_POLICE */ s64 burst; u64 rate_bytes_ps; + u32 mtu; } police; struct { /* FLOW_ACTION_CT */ int action; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index f098ad4424be..cd973b10ae8c 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -69,4 +69,14 @@ static inline s64 tcf_police_tcfp_burst(const struct tc_action *act) return params->tcfp_burst; } +static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->tcfp_mtu; +} + #endif /* __NET_TC_POLICE_H */ -- cgit v1.2.3 From 627e39b1399e72e53895eec6bbec30199ed43de2 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Wed, 24 Jun 2020 17:36:30 +0800 Subject: net: qos: police action add index for tc flower offloading Hardware device may include more than one police entry. Specifying the action's index make it possible for several tc filters to share the same police action when installing the filters. Propagate this index to device drivers through the flow offload intermediate representation, so that drivers could share a single hardware policer between multiple filters. v1->v2 changes: - Update the commit message suggest by Ido Schimmel Signed-off-by: Po Liu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index c2ef19c6b27d..eed98075b1ae 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -232,6 +232,7 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ + u32 index; s64 burst; u64 rate_bytes_ps; u32 mtu; -- cgit v1.2.3 From c6d5d843d9b6e8dca3768250970f0d0a1e3d4fb0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 24 Jun 2020 11:06:54 +0100 Subject: net: phylink: add phylink_speed_(up|down) interface Add an interface for the phy_speed_(up|down) functions when a driver makes use of phylink. These pass the call through to phylib when we have a normal PHY attached (i.o.w., not a PHY on a SFP module.) Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index cc5b452a184e..b32b8b45421b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -392,6 +392,8 @@ int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); +int phylink_speed_down(struct phylink *pl, bool sync); +int phylink_speed_up(struct phylink *pl); #define phylink_zero(bm) \ bitmap_zero(bm, __ETHTOOL_LINK_MODE_MASK_NBITS) -- cgit v1.2.3 From 92252eec913b2dd5e7b5de11ea3efa2e64d65cf4 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 24 Jun 2020 07:16:02 -0500 Subject: net: phy: Add a helper to return the index for of the internal delay Add a helper function that will return the index in the array for the passed in internal delay value. The helper requires the array, size and delay value. The helper will then return the index for the exact match or return the index for the index to the closest smaller value. Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6fb8f302978d..2c00374dc996 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1443,6 +1443,10 @@ void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp); void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); + +s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + const int *delay_values, int size, bool is_rx); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); -- cgit v1.2.3 From fe21b6c3a65ca74cc4d0909635649bea48b115b3 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 4 May 2020 09:43:48 -0700 Subject: i40e: Move client header location Move i40e_client.h to include/linux/net/intel/* since its shared between i40iw and i40e. Signed-off-by: Shiraz Saleem Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/net/intel/i40e_client.h | 203 ++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 include/linux/net/intel/i40e_client.h (limited to 'include') diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h new file mode 100644 index 000000000000..72994baf4941 --- /dev/null +++ b/include/linux/net/intel/i40e_client.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#ifndef _I40E_CLIENT_H_ +#define _I40E_CLIENT_H_ + +#define I40E_CLIENT_STR_LENGTH 10 + +/* Client interface version should be updated anytime there is a change in the + * existing APIs or data structures. + */ +#define I40E_CLIENT_VERSION_MAJOR 0 +#define I40E_CLIENT_VERSION_MINOR 01 +#define I40E_CLIENT_VERSION_BUILD 00 +#define I40E_CLIENT_VERSION_STR \ + __stringify(I40E_CLIENT_VERSION_MAJOR) "." \ + __stringify(I40E_CLIENT_VERSION_MINOR) "." \ + __stringify(I40E_CLIENT_VERSION_BUILD) + +struct i40e_client_version { + u8 major; + u8 minor; + u8 build; + u8 rsvd; +}; + +enum i40e_client_state { + __I40E_CLIENT_NULL, + __I40E_CLIENT_REGISTERED +}; + +enum i40e_client_instance_state { + __I40E_CLIENT_INSTANCE_NONE, + __I40E_CLIENT_INSTANCE_OPENED, +}; + +struct i40e_ops; +struct i40e_client; + +/* HW does not define a type value for AEQ; only for RX/TX and CEQ. + * In order for us to keep the interface simple, SW will define a + * unique type value for AEQ. + */ +#define I40E_QUEUE_TYPE_PE_AEQ 0x80 +#define I40E_QUEUE_INVALID_IDX 0xFFFF + +struct i40e_qv_info { + u32 v_idx; /* msix_vector */ + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct i40e_qvlist_info { + u32 num_vectors; + struct i40e_qv_info qv_info[1]; +}; + +#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF + +/* set of LAN parameters useful for clients managed by LAN */ + +/* Struct to hold per priority info */ +struct i40e_prio_qos_params { + u16 qs_handle; /* qs handle for prio */ + u8 tc; /* TC mapped to prio */ + u8 reserved; +}; + +#define I40E_CLIENT_MAX_USER_PRIORITY 8 +/* Struct to hold Client QoS */ +struct i40e_qos_params { + struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY]; +}; + +struct i40e_params { + struct i40e_qos_params qos; + u16 mtu; +}; + +/* Structure to hold Lan device info for a client device */ +struct i40e_info { + struct i40e_client_version version; + u8 lanmac[6]; + struct net_device *netdev; + struct pci_dev *pcidev; + u8 __iomem *hw_addr; + u8 fid; /* function id, PF id or VF id */ +#define I40E_CLIENT_FTYPE_PF 0 +#define I40E_CLIENT_FTYPE_VF 1 + u8 ftype; /* function type, PF or VF */ + void *pf; + + /* All L2 params that could change during the life span of the PF + * and needs to be communicated to the client when they change + */ + struct i40e_qvlist_info *qvlist_info; + struct i40e_params params; + struct i40e_ops *ops; + + u16 msix_count; /* number of msix vectors*/ + /* Array down below will be dynamically allocated based on msix_count */ + struct msix_entry *msix_entries; + u16 itr_index; /* Which ITR index the PE driver is suppose to use */ + u16 fw_maj_ver; /* firmware major version */ + u16 fw_min_ver; /* firmware minor version */ + u32 fw_build; /* firmware build number */ +}; + +#define I40E_CLIENT_RESET_LEVEL_PF 1 +#define I40E_CLIENT_RESET_LEVEL_CORE 2 +#define I40E_CLIENT_VSI_FLAG_TCP_ENABLE BIT(1) + +struct i40e_ops { + /* setup_q_vector_list enables queues with a particular vector */ + int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client, + struct i40e_qvlist_info *qv_info); + + int (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client, + u32 vf_id, u8 *msg, u16 len); + + /* If the PE Engine is unresponsive, RDMA driver can request a reset. + * The level helps determine the level of reset being requested. + */ + void (*request_reset)(struct i40e_info *ldev, + struct i40e_client *client, u32 level); + + /* API for the RDMA driver to set certain VSI flags that control + * PE Engine. + */ + int (*update_vsi_ctxt)(struct i40e_info *ldev, + struct i40e_client *client, + bool is_vf, u32 vf_id, + u32 flag, u32 valid_flag); +}; + +struct i40e_client_ops { + /* Should be called from register_client() or whenever PF is ready + * to create a specific client instance. + */ + int (*open)(struct i40e_info *ldev, struct i40e_client *client); + + /* Should be called when netdev is unavailable or when unregister + * call comes in. If the close is happenening due to a reset being + * triggered set the reset bit to true. + */ + void (*close)(struct i40e_info *ldev, struct i40e_client *client, + bool reset); + + /* called when some l2 managed parameters changes - mtu */ + void (*l2_param_change)(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_params *params); + + int (*virtchnl_receive)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id, + u8 *msg, u16 len); + + /* called when a VF is reset by the PF */ + void (*vf_reset)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id); + + /* called when the number of VFs changes */ + void (*vf_enable)(struct i40e_info *ldev, + struct i40e_client *client, u32 num_vfs); + + /* returns true if VF is capable of specified offload */ + int (*vf_capable)(struct i40e_info *ldev, + struct i40e_client *client, u32 vf_id); +}; + +/* Client device */ +struct i40e_client_instance { + struct list_head list; + struct i40e_info lan_info; + struct i40e_client *client; + unsigned long state; +}; + +struct i40e_client { + struct list_head list; /* list of registered clients */ + char name[I40E_CLIENT_STR_LENGTH]; + struct i40e_client_version version; + unsigned long state; /* client state */ + atomic_t ref_cnt; /* Count of all the client devices of this kind */ + u32 flags; +#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) +#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) + u8 type; +#define I40E_CLIENT_IWARP 0 + const struct i40e_client_ops *ops; /* client ops provided by the client */ +}; + +static inline bool i40e_client_is_registered(struct i40e_client *client) +{ + return test_bit(__I40E_CLIENT_REGISTERED, &client->state); +} + +/* used by clients */ +int i40e_register_client(struct i40e_client *client); +int i40e_unregister_client(struct i40e_client *client); + +#endif /* _I40E_CLIENT_H_ */ -- cgit v1.2.3 From 3c98f9ee6bc280499cbcb6f8e42c001c3bd7caa1 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 6 Jan 2020 16:09:33 -0800 Subject: i40e: remove unused defines Remove all the unused defines as they are just dead weight. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/net/intel/i40e_client.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index 72994baf4941..f41387a8969f 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -37,11 +37,6 @@ enum i40e_client_instance_state { struct i40e_ops; struct i40e_client; -/* HW does not define a type value for AEQ; only for RX/TX and CEQ. - * In order for us to keep the interface simple, SW will define a - * unique type value for AEQ. - */ -#define I40E_QUEUE_TYPE_PE_AEQ 0x80 #define I40E_QUEUE_INVALID_IDX 0xFFFF struct i40e_qv_info { @@ -56,7 +51,6 @@ struct i40e_qvlist_info { struct i40e_qv_info qv_info[1]; }; -#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF /* set of LAN parameters useful for clients managed by LAN */ @@ -87,7 +81,6 @@ struct i40e_info { u8 __iomem *hw_addr; u8 fid; /* function id, PF id or VF id */ #define I40E_CLIENT_FTYPE_PF 0 -#define I40E_CLIENT_FTYPE_VF 1 u8 ftype; /* function type, PF or VF */ void *pf; @@ -184,8 +177,6 @@ struct i40e_client { unsigned long state; /* client state */ atomic_t ref_cnt; /* Count of all the client devices of this kind */ u32 flags; -#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) -#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) u8 type; #define I40E_CLIENT_IWARP 0 const struct i40e_client_ops *ops; /* client ops provided by the client */ -- cgit v1.2.3 From bccb48c89fe3c09f1cbb7c8612e31f5daa1d4541 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 1 Jun 2020 20:13:21 +0200 Subject: batman-adv: Fix typos and grammar in documentation Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 50 +++++++++++++++++++------------------- include/uapi/linux/batman_adv.h | 4 +-- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 0ae34c85ef9e..9c8604c5b5f6 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -72,8 +72,8 @@ enum batadv_subtype { /** * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets - * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was - * previously received from someone else than the best neighbor. + * @BATADV_NOT_BEST_NEXT_HOP: flag is set when the ogm packet is forwarded and + * was previously received from someone other than the best neighbor. * @BATADV_PRIMARIES_FIRST_HOP: flag unused. * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a * one hop neighbor on the interface where it was originally received. @@ -195,8 +195,8 @@ struct batadv_bla_claim_dst { /** * struct batadv_ogm_packet - ogm (routing protocol) packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @seqno: sequence identification * @orig: address of the source node @@ -247,7 +247,7 @@ struct batadv_ogm2_packet { /** * struct batadv_elp_packet - elp (neighbor discovery) packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header + * @version: batman-adv protocol version, part of the general header * @orig: originator mac address * @seqno: sequence number * @elp_interval: currently used ELP sending interval in ms @@ -265,15 +265,15 @@ struct batadv_elp_packet { /** * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier * @align: not used - useful for alignment purposes only * - * This structure is used for ICMP packets parsing only and it is never sent + * This structure is used for ICMP packet parsing only and it is never sent * over the wire. The alignment field at the end is there to ensure that * members are padded the same way as they are in real packets. */ @@ -291,8 +291,8 @@ struct batadv_icmp_header { /** * struct batadv_icmp_packet - ICMP packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -315,8 +315,8 @@ struct batadv_icmp_packet { /** * struct batadv_icmp_tp_packet - ICMP TP Meter packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -358,8 +358,8 @@ enum batadv_icmp_tp_subtype { /** * struct batadv_icmp_packet_rr - ICMP RouteRecord packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node @@ -397,8 +397,8 @@ struct batadv_icmp_packet_rr { /** * struct batadv_unicast_packet - unicast packet for network payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @ttvn: translation table version number * @dest: originator destination of the unicast packet */ @@ -433,8 +433,8 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -467,8 +467,8 @@ struct batadv_frag_packet { /** * struct batadv_bcast_packet - broadcast packet for network payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @reserved: reserved byte for alignment * @seqno: sequence identification * @orig: originator of the broadcast packet @@ -488,10 +488,10 @@ struct batadv_bcast_packet { /** * struct batadv_coded_packet - network coded packet * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @first_source: original source of first included packet - * @first_orig_dest: original destinal of first included packet + * @first_orig_dest: original destination of first included packet * @first_crc: checksum of first included packet * @first_ttvn: tt-version number of first included packet * @second_ttl: ttl of second packet @@ -523,8 +523,8 @@ struct batadv_coded_packet { /** * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 617c180ff0c8..8cf2ad11ead9 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -69,7 +69,7 @@ enum batadv_tt_client_flags { /** * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be - * part of the network but no nnode has already announced it + * part of the network but no node has already announced it */ BATADV_TT_CLIENT_TEMP = (1 << 11), }; @@ -131,7 +131,7 @@ enum batadv_gw_modes { /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ BATADV_GW_MODE_CLIENT, - /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */ + /** @BATADV_GW_MODE_SERVER: announce itself as gateway server */ BATADV_GW_MODE_SERVER, }; -- cgit v1.2.3 From 3bda14d09dc5789a895ab02b7dcfcec19b0a65b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 1 Jun 2020 22:35:22 +0200 Subject: batman-adv: Introduce a configurable per interface hop penalty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some setups multiple hard interfaces with similar link qualities or throughput values are available. But people have expressed the desire to consider one of them as a backup only. Some creative solutions are currently in use: Such people are configuring multiple batman-adv mesh/soft interfaces, wire them together with some veth pairs and then tune the hop penalty to achieve an effect similar to a tunable per interface hop penalty. This patch introduces a new, configurable, per hard interface hop penalty to simplify such setups. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 8cf2ad11ead9..bb0ae945b36a 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -427,7 +427,8 @@ enum batadv_nl_attrs { /** * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied - * to an originator message's tq-field on every hop. + * to an originator message's tq-field on every hop and/or per + * hard interface */ BATADV_ATTR_HOP_PENALTY, -- cgit v1.2.3 From 333740981f94fa80326cc8e5d2da105f17bc1dd5 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 26 Jun 2020 17:53:23 +0200 Subject: net: mdio: add a forward declaration for reset_control to mdio.h This header refers to struct reset_control but doesn't include any reset header. The structure definition is probably somehow indirectly pulled in since no warnings are reported but for the sake of correctness add the forward declaration for struct reset_control. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 36d2e0673d03..898cbf00332a 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -18,6 +18,7 @@ struct gpio_desc; struct mii_bus; +struct reset_control; /* Multiple levels of nesting are possible. However typically this is * limited to nested DSA like layer, a MUX layer, and the normal -- cgit v1.2.3 From 9205d7b1c1cffa827c23bdbf35e04c7cbe1e1f10 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 25 Jun 2020 22:59:41 -0700 Subject: net/mlx5: Avoid RDMA file inclusion in core driver mlx5 cq.h does not depend on RDMA verbs. Remove RDMA verbs file inclusion. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/cq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index b5a9399e07ee..7bfb67363434 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -33,7 +33,6 @@ #ifndef MLX5_CORE_CQ_H #define MLX5_CORE_CQ_H -#include #include #include -- cgit v1.2.3 From 2d1b69ed65ee033aa541518cc9f6a815296ac493 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 25 Jun 2020 22:59:43 -0700 Subject: net/mlx5: kTLS, Improve TLS params layout structures Add explicit WQE segment structures for the TLS static and progress params. According to the HW spec, TISN is not part of the progress params context, take it out of it. Rename the control segment tisn field as it could hold either a TIS or a TIR number. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 9 +++++++++ include/linux/mlx5/mlx5_ifc.h | 5 +---- include/linux/mlx5/qp.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1bc27aca648b..57db125e5802 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -458,6 +458,15 @@ enum { MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS = 0x2, }; +struct mlx5_wqe_tls_static_params_seg { + u8 ctx[MLX5_ST_SZ_BYTES(tls_static_params)]; +}; + +struct mlx5_wqe_tls_progress_params_seg { + __be32 tis_tir_num; + u8 ctx[MLX5_ST_SZ_BYTES(tls_progress_params)]; +}; + enum { MLX5_SET_PORT_RESET_QKEY = 0, MLX5_SET_PORT_GUID0 = 16, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 116bd9bb347f..a227518c70cf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10638,16 +10638,13 @@ struct mlx5_ifc_tls_static_params_bits { }; struct mlx5_ifc_tls_progress_params_bits { - u8 reserved_at_0[0x8]; - u8 tisn[0x18]; - u8 next_record_tcp_sn[0x20]; u8 hw_resync_tcp_sn[0x20]; u8 record_tracker_state[0x2]; u8 auth_state[0x2]; - u8 reserved_at_64[0x4]; + u8 reserved_at_44[0x4]; u8 hw_offset_record_number[0x18]; }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index b8992b861ae6..36492a1342cf 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -209,7 +209,7 @@ struct mlx5_wqe_ctrl_seg { __be32 general_id; __be32 imm; __be32 umr_mkey; - __be32 tisn; + __be32 tis_tir_num; }; }; -- cgit v1.2.3 From acb5a07aaf2723cd273a4089e62611a414fb1c35 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Mon, 8 Jun 2020 12:42:52 +0300 Subject: Revert "net/tls: Add force_resync for driver resync" This reverts commit b3ae2459f89773adcbf16fef4b68deaaa3be1929. Revert the force resync API. Not in use. To be replaced by a better async resync API downstream. Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- include/net/tls.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 3212d3c214a9..ca5f7f437289 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -607,22 +607,12 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) #endif /* The TLS context is valid until sk_destruct is called */ -#define RESYNC_REQ (1 << 0) -#define RESYNC_REQ_FORCE (1 << 1) static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); -} - -static inline void tls_offload_rx_force_resync_request(struct sock *sk) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_req, RESYNC_REQ | RESYNC_REQ_FORCE); + atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1); } static inline void -- cgit v1.2.3 From ed9b7646b06a2ed2450dd9437fc7d1ad2783140c Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Mon, 8 Jun 2020 19:11:38 +0300 Subject: net/tls: Add asynchronous resync This patch adds support for asynchronous resynchronization in tls_device. Async resync follows two distinct stages: 1. The NIC driver indicates that it would like to resync on some TLS record within the received packet (P), but the driver does not know (yet) which of the TLS records within the packet. At this stage, the NIC driver will query the device to find the exact TCP sequence for resync (tcpsn), however, the driver does not wait for the device to provide the response. 2. Eventually, the device responds, and the driver provides the tcpsn within the resync packet to KTLS. Now, KTLS can check the tcpsn against any processed TLS records within packet P, and also against any record that is processed in the future within packet P. The asynchronous resync path simplifies the device driver, as it can save bits on the packet completion (32-bit TCP sequence), and pass this information on an asynchronous command instead. Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/net/tls.h | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index ca5f7f437289..c875c0a445a6 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -291,11 +291,19 @@ struct tlsdev_ops { enum tls_offload_sync_type { TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1, + TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC = 2, }; #define TLS_DEVICE_RESYNC_NH_START_IVAL 2 #define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128 +#define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 +struct tls_offload_resync_async { + atomic64_t req; + u32 loglen; + u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; +}; + struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -314,6 +322,10 @@ struct tls_offload_context_rx { u32 decrypted_failed; u32 decrypted_tgt; } resync_nh; + /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC */ + struct { + struct tls_offload_resync_async *resync_async; + }; }; u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state @@ -606,13 +618,37 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) } #endif +#define RESYNC_REQ BIT(0) +#define RESYNC_REQ_ASYNC BIT(1) /* The TLS context is valid until sk_destruct is called */ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1); + atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); +} + +/* Log all TLS record header TCP sequences in [seq, seq+len] */ +static inline void +tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); + + atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + (len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); + rx_ctx->resync_async->loglen = 0; +} + +static inline void +tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); + + atomic64_set(&rx_ctx->resync_async->req, + ((u64)ntohl(seq) << 32) | RESYNC_REQ); } static inline void -- cgit v1.2.3 From fe80536acf8397827be77f9b8ada384b90e790d0 Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 28 Jun 2020 23:18:23 +0530 Subject: bareudp: Added attribute to enable & disable rx metadata collection Metadata need not be collected in receive if the packet from bareudp device is not targeted to openvswitch. Signed-off-by: Martin Signed-off-by: David S. Miller --- include/net/bareudp.h | 1 + include/uapi/linux/if_link.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bareudp.h b/include/net/bareudp.h index dc65a0d71d9b..3dd5f9a8d01c 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -12,6 +12,7 @@ struct bareudp_conf { __be16 port; u16 sport_min; bool multi_proto_mode; + bool rx_collect_metadata; }; struct net_device *bareudp_dev_create(struct net *net, const char *name, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a009365ad67b..cc185a007ade 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -600,6 +600,7 @@ enum { IFLA_BAREUDP_ETHERTYPE, IFLA_BAREUDP_SRCPORT_MIN, IFLA_BAREUDP_MULTIPROTO_MODE, + IFLA_BAREUDP_RX_COLLECT_METADATA, __IFLA_BAREUDP_MAX }; -- cgit v1.2.3 From 6fc3e68f5b35c4861b28733fa32f636db7188746 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 28 Jun 2020 17:32:25 +0800 Subject: sctp: use list_is_singular in sctp_list_single_entry Use list_is_singular() instead of open-coding. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index f8bcb75bb044..e3bd198b00ae 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -412,7 +412,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { - return (head->next != head) && (head->next == head->prev); + return list_is_singular(head); } static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) -- cgit v1.2.3 From aebe4426ccaa4838f36ea805cdf7d76503e65117 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:25 +0300 Subject: net: sched: Pass root lock to Qdisc_ops.enqueue A following patch introduces qevents, points in qdisc algorithm where packet can be processed by user-defined filters. Should this processing lead to a situation where a new packet is to be enqueued on the same port, holding the root lock would lead to deadlocks. To solve the issue, qevent handler needs to unlock and relock the root lock when necessary. To that end, add the root lock argument to the qdisc op enqueue, and propagate throughout. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c510b03b9751..fceb3d63c925 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -57,6 +57,7 @@ struct qdisc_skb_head { struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; @@ -241,6 +242,7 @@ struct Qdisc_ops { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); @@ -788,11 +790,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, #endif } -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); - return sch->enqueue(skb, sch, to_free); + return sch->enqueue(skb, sch, root_lock, to_free); } static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, -- cgit v1.2.3 From 3625750f05ecce21a0fce429c1ff85acfffb461b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:26 +0300 Subject: net: sched: Introduce helpers for qevent blocks Qevents are attach points for TC blocks, where filters can be put that are executed when "interesting events" take place in a qdisc. The data to keep and the functions to invoke to maintain a qevent will be largely the same between qevents. Therefore introduce sched-wide helpers for qevent management. Currently, similarly to ingress and egress blocks of clsact pseudo-qdisc, blocks attachment cannot be changed after the qdisc is created. To that end, add a helper tcf_qevent_validate_change(), which verifies whether block index attribute is not attached, or if it is, whether its value matches the current one (i.e. there is no material change). The function tcf_qevent_handle() should be invoked when qdisc hits the "interesting event" corresponding to a block. This function releases root lock for the duration of executing the attached filters, to allow packets generated through user actions (notably mirred) to be reinserted to the same qdisc tree. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ff017e5b3ea2..690a7f49c8f9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -32,6 +32,12 @@ struct tcf_block_ext_info { u32 block_index; }; +struct tcf_qevent { + struct tcf_block *block; + struct tcf_block_ext_info info; + struct tcf_proto __rcu *filter_chain; +}; + struct tcf_block_cb; bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); @@ -553,6 +559,49 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, void *cb_priv, u32 *flags, unsigned int *in_hw_count); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); +#ifdef CONFIG_NET_CLS_ACT +int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack); +void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch); +int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack); +struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret); +int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe); +#else +static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) +{ +} + +static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline struct sk_buff * +tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret) +{ + return skb; +} + +static inline int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) +{ + return 0; +} +#endif + struct tc_cls_u32_knode { struct tcf_exts *exts; struct tcf_result *res; -- cgit v1.2.3 From aee9caa03fc3c8b02f8f31824354d85f30e562e0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:28 +0300 Subject: net: sched: sch_red: Add qevents "early_drop" and "mark" In order to allow acting on dropped and/or ECN-marked packets, add two new qevents to the RED qdisc: "early_drop" and "mark". Filters attached at "early_drop" block are executed as packets are early-dropped, those attached at the "mark" block are executed as packets are ECN-marked. Two new attributes are introduced: TCA_RED_EARLY_DROP_BLOCK with the block index for the "early_drop" qevent, and TCA_RED_MARK_BLOCK for the "mark" qevent. Absence of these attributes signifies "don't care": no block is allocated in that case, or the existing blocks are left intact in case of the change callback. For purposes of offloading, blocks attached to these qevents appear with newly-introduced binder types, FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP and FLOW_BLOCK_BINDER_TYPE_RED_MARK. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 ++ include/uapi/linux/pkt_sched.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3bafb5124ac0..3e793ac66baf 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -424,6 +424,8 @@ enum flow_block_binder_type { FLOW_BLOCK_BINDER_TYPE_UNSPEC, FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS, + FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, + FLOW_BLOCK_BINDER_TYPE_RED_MARK, }; struct flow_block { diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a95f3ae7ab37..9e7c2c607845 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -257,6 +257,8 @@ enum { TCA_RED_STAB, TCA_RED_MAX_P, TCA_RED_FLAGS, /* bitfield32 */ + TCA_RED_EARLY_DROP_BLOCK, /* u32 */ + TCA_RED_MARK_BLOCK, /* u32 */ __TCA_RED_MAX, }; -- cgit v1.2.3 From 5f035af76e51cd622abc6564d5512ffeb9e06917 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Mon, 29 Jun 2020 14:54:16 +0800 Subject: net:qos: police action offloading parameter 'burst' change to the original value Since 'tcfp_burst' with TICK factor, driver side always need to recover it to the original value, this patch moves the generic calculation and recover to the 'burst' original value before offloading to device driver. Signed-off-by: Po Liu Acked-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- include/net/flow_offload.h | 2 +- include/net/tc_act/tc_police.h | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 50389772c597..4046ccd1945d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -144,7 +144,7 @@ struct dsa_mall_mirror_tc_entry { /* TC port policer entry */ struct dsa_mall_policer_tc_entry { - s64 burst; + u32 burst; u64 rate_bytes_per_sec; }; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3e793ac66baf..de395498440d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -233,7 +233,7 @@ struct flow_action_entry { } sample; struct { /* FLOW_ACTION_POLICE */ u32 index; - s64 burst; + u32 burst; u64 rate_bytes_ps; u32 mtu; } police; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index cd973b10ae8c..6d1e26b709b5 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -59,14 +59,42 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act) return params->rate.rate_bytes_ps; } -static inline s64 tcf_police_tcfp_burst(const struct tc_action *act) +static inline u32 tcf_police_burst(const struct tc_action *act) { struct tcf_police *police = to_police(act); struct tcf_police_params *params; + u32 burst; params = rcu_dereference_protected(police->params, lockdep_is_held(&police->tcf_lock)); - return params->tcfp_burst; + + /* + * "rate" bytes "burst" nanoseconds + * ------------ * ------------------- + * 1 second 2^6 ticks + * + * ------------------------------------ + * NSEC_PER_SEC nanoseconds + * ------------------------ + * 2^6 ticks + * + * "rate" bytes "burst" nanoseconds 2^6 ticks + * = ------------ * ------------------- * ------------------------ + * 1 second 2^6 ticks NSEC_PER_SEC nanoseconds + * + * "rate" * "burst" + * = ---------------- bytes/nanosecond + * NSEC_PER_SEC^2 + * + * + * "rate" * "burst" + * = ---------------- bytes/second + * NSEC_PER_SEC + */ + burst = div_u64(params->tcfp_burst * params->rate.rate_bytes_ps, + NSEC_PER_SEC); + + return burst; } static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) -- cgit v1.2.3 From ecc31c60240b9808a274befc5db6b8a249a6ade1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 29 Jun 2020 23:46:16 +0300 Subject: ethtool: Add link extended state Currently, drivers can only tell whether the link is up/down using LINKSTATE_GET, but no additional information is given. Add attributes to LINKSTATE_GET command in order to allow drivers to expose the user more information in addition to link state to ease the debug process, for example, reason for link down state. Extended state consists of two attributes - link_ext_state and link_ext_substate. The idea is to avoid 'vendor specific' states in order to prevent drivers to use specific link_ext_state that can be in the future common link_ext_state. The substates allows drivers to add more information to the common link_ext_state. For example, vendor can expose 'Autoneg' as link_ext_state and add 'No partner detected during force mode' as link_ext_substate. If a driver cannot pinpoint the extended state with the substate accuracy, it is free to expose only the extended state and omit the substate attribute. Signed-off-by: Amit Cohen Reviewed-by: Jiri Pirko Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/ethtool.h | 23 ++++++++++++ include/uapi/linux/ethtool.h | 70 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 2 ++ 3 files changed, 95 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a23b26eab479..48ad3b6a0150 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -86,6 +86,22 @@ struct net_device; u32 ethtool_op_get_link(struct net_device *dev); int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); + +/** + * struct ethtool_link_ext_state_info - link extended state and substate. + */ +struct ethtool_link_ext_state_info { + enum ethtool_link_ext_state link_ext_state; + union { + enum ethtool_link_ext_substate_autoneg autoneg; + enum ethtool_link_ext_substate_link_training link_training; + enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch; + enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; + enum ethtool_link_ext_substate_cable_issue cable_issue; + u8 __link_ext_substate; + }; +}; + /** * ethtool_rxfh_indir_default - get default value for RX flow hash indirection * @index: Index in RX flow hash indirection table @@ -245,6 +261,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * @get_link: Report whether physical link is up. Will only be called if * the netdev is up. Should usually be set to ethtool_op_get_link(), * which uses netif_carrier_ok(). + * @get_link_ext_state: Report link extended state. Should set link_ext_state and + * link_ext_substate (link_ext_substate of 0 means link_ext_substate is unknown, + * do not attach ext_substate attribute to netlink message). If link_ext_state + * and link_ext_substate are unknown, return -ENODATA. If not implemented, + * link_ext_state and link_ext_substate will not be sent to userspace. * @get_eeprom: Read data from the device EEPROM. * Should fill in the magic field. Don't need to check len for zero * or wraparound. Fill in the data argument with the eeprom values @@ -384,6 +405,8 @@ struct ethtool_ops { void (*set_msglevel)(struct net_device *, u32); int (*nway_reset)(struct net_device *); u32 (*get_link)(struct net_device *); + int (*get_link_ext_state)(struct net_device *, + struct ethtool_link_ext_state_info *); int (*get_eeprom_len)(struct net_device *); int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f4662b3a9e1e..d1413538ef30 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -579,6 +579,76 @@ struct ethtool_pauseparam { __u32 tx_pause; }; +/** + * enum ethtool_link_ext_state - link extended state + */ +enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_STATE_NO_CABLE, + ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, +}; + +/** + * enum ethtool_link_ext_substate_autoneg - more information in addition to + * ETHTOOL_LINK_EXT_STATE_AUTONEG. + */ +enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, +}; + +/** + * enum ethtool_link_ext_substate_link_training - more information in addition to + * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + */ +enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, +}; + +/** + * enum ethtool_link_ext_substate_logical_mismatch - more information in addition + * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + */ +enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, +}; + +/** + * enum ethtool_link_ext_substate_bad_signal_integrity - more information in + * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + */ +enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, +}; + +/** + * enum ethtool_link_ext_substate_cable_issue - more information in + * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. + */ +enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, +}; + #define ETH_GSTRING_LEN 32 /** diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 4dda5e4244a7..c12ce4df4b6b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -236,6 +236,8 @@ enum { ETHTOOL_A_LINKSTATE_LINK, /* u8 */ ETHTOOL_A_LINKSTATE_SQI, /* u32 */ ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ + ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, -- cgit v1.2.3 From 857ca89711de3dbcc674d58a6d7d297ee0bd34e1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 21 Jun 2020 18:40:30 +0300 Subject: ipvs: register hooks only with services Keep the IPVS hooks registered in Netfilter only while there are configured virtual services. This saves CPU cycles while IPVS is loaded but not used. Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 83be2d93b407..0c9881241323 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -874,6 +874,7 @@ struct netns_ipvs { struct ip_vs_stats tot_stats; /* Statistics & est. */ int num_services; /* no of virtual services */ + int num_services6; /* IPv6 virtual services */ /* Trash for destinations */ struct list_head dest_trash; @@ -960,6 +961,7 @@ struct netns_ipvs { * are not supported when synchronization is enabled. */ unsigned int mixed_address_family_dests; + unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ }; #define DEFAULT_SYNC_THRESHOLD 3 @@ -1670,6 +1672,9 @@ static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc) #endif } +int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af); +void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af); + static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { -- cgit v1.2.3 From a6ed3ebca49b62d7a917287b9986feff4e9fa7b1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 30 Jun 2020 15:27:46 +0100 Subject: net/tls: fix sign extension issue when left shifting u16 value Left shifting the u16 value promotes it to a int and then it gets sign extended to a u64. If len << 16 is greater than 0x7fffffff then the upper bits get set to 1 because of the implicit sign extension. Fix this by casting len to u64 before shifting it. Addresses-Coverity: ("integer handling issues") Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync") Signed-off-by: Colin Ian King Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index c875c0a445a6..e5dac7e74e79 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -637,7 +637,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | - (len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); + ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); rx_ctx->resync_async->loglen = 0; } -- cgit v1.2.3 From ab81e23cf77905896a5e51422eb58e8763507b85 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 29 Jun 2020 14:05:07 +0300 Subject: net: qed: correct existing SPDX tags QLogic QED drivers source code is dual licensed under GPL-2.0/BSD-3-Clause. Correct already existing but wrong SPDX tags to match the actual license. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/fcoe_common.h | 2 +- include/linux/qed/qed_fcoe_if.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index 98cfc195abe2..a669d7d84284 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015 QLogic Corporation */ diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 46082480a2c3..65d0317ef67e 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -1,4 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + #ifndef _QED_FCOE_IF_H #define _QED_FCOE_IF_H #include -- cgit v1.2.3 From 1f4d4ed6acc5b74974fa038e5e9d2e19f3b532f3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 29 Jun 2020 14:05:08 +0300 Subject: net: qed: convert to SPDX License Identifiers QLogic QED drivers source code is dual licensed under GPL-2.0/BSD-3-Clause. Remove all the boilerplates in the existing code and replace it with the correct SPDX tag. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 29 +---------------------------- include/linux/qed/eth_common.h | 29 +---------------------------- include/linux/qed/iscsi_common.h | 29 +---------------------------- include/linux/qed/iwarp_common.h | 29 +---------------------------- include/linux/qed/qed_chain.h | 29 +---------------------------- include/linux/qed/qed_eth_if.h | 29 +---------------------------- include/linux/qed/qed_if.h | 29 +---------------------------- include/linux/qed/qed_iov_if.h | 29 +---------------------------- include/linux/qed/qed_iscsi_if.h | 29 +---------------------------- include/linux/qed/qed_ll2_if.h | 29 +---------------------------- include/linux/qed/qed_rdma_if.h | 30 ++---------------------------- include/linux/qed/qede_rdma.h | 30 ++---------------------------- include/linux/qed/rdma_common.h | 29 +---------------------------- include/linux/qed/roce_common.h | 29 +---------------------------- include/linux/qed/storage_common.h | 29 +---------------------------- include/linux/qed/tcp_common.h | 29 +---------------------------- 16 files changed, 18 insertions(+), 448 deletions(-) (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 2c4737e6694a..294d01eae5cb 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2016 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _COMMON_HSI_H diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 95f5fd615852..a9566ef3c2ce 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __ETH_COMMON__ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 2f0a771a9176..7ca89fb9247f 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __ISCSI_COMMON__ diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h index c6cfd39cd910..23583e644257 100644 --- a/include/linux/qed/iwarp_common.h +++ b/include/linux/qed/iwarp_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __IWARP_COMMON__ diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 6d15040c642c..e6e25197f7cb 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_CHAIN_H diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index a1310482c4ed..7803dedbcb52 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_ETH_IF_H diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8cb76405cbce..4a36608ff3a8 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_IF_H diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index ac2e6a3199a3..c2ca8196def9 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_IOV_IF_H diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index d0df1bec5357..89912c6c440c 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_ISCSI_IF_H diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 1313c34d9a68..79cac277e38b 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef _QED_LL2_IF_H diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 2d3ddd2b85e0..041a5b005a82 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -1,34 +1,8 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ + #ifndef _QED_RDMA_IF_H #define _QED_RDMA_IF_H #include diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 5a00c7a473bf..20ed7f2c55bb 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -1,34 +1,8 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qedr NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ + #ifndef QEDE_ROCE_H #define QEDE_ROCE_H diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 480a57eb36cc..3e3f01136c06 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __RDMA_COMMON__ diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index 473fba76aa77..89065f023813 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __ROCE_COMMON__ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 9a973ffbbff5..34f069c79067 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __STORAGE_COMMON__ diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index 4a4845193539..925e7cb7a582 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef __TCP_COMMON__ -- cgit v1.2.3 From 663eacd899ac131dcfc2279184c1ce3c4fd2815f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 29 Jun 2020 14:05:09 +0300 Subject: net: qed: update copyright years Set the actual copyright holder and years in all qed source files. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 1 + include/linux/qed/eth_common.h | 1 + include/linux/qed/fcoe_common.h | 1 + include/linux/qed/iscsi_common.h | 1 + include/linux/qed/iwarp_common.h | 1 + include/linux/qed/qed_chain.h | 1 + include/linux/qed/qed_eth_if.h | 1 + include/linux/qed/qed_fcoe_if.h | 1 + include/linux/qed/qed_if.h | 1 + include/linux/qed/qed_iov_if.h | 1 + include/linux/qed/qed_iscsi_if.h | 1 + include/linux/qed/qed_ll2_if.h | 1 + include/linux/qed/qed_rdma_if.h | 1 + include/linux/qed/qede_rdma.h | 1 + include/linux/qed/rdma_common.h | 1 + include/linux/qed/roce_common.h | 1 + include/linux/qed/storage_common.h | 1 + include/linux/qed/tcp_common.h | 1 + 18 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 294d01eae5cb..977807e1be53 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2016 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _COMMON_HSI_H diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index a9566ef3c2ce..cd1207ad4ada 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __ETH_COMMON__ diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index a669d7d84284..68eda1c21cde 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __FCOE_COMMON__ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 7ca89fb9247f..157019f716f1 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __ISCSI_COMMON__ diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h index 23583e644257..14f9e4c0ddd9 100644 --- a/include/linux/qed/iwarp_common.h +++ b/include/linux/qed/iwarp_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __IWARP_COMMON__ diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index e6e25197f7cb..92cdc79e5019 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_CHAIN_H diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 7803dedbcb52..812a4d751163 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_ETH_IF_H diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 65d0317ef67e..16752eca5cbd 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_FCOE_IF_H #define _QED_FCOE_IF_H diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 4a36608ff3a8..5ca081cd2ed9 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_IF_H diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index c2ca8196def9..8e31a28e51b9 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_IOV_IF_H diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index 89912c6c440c..04180d9af560 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_ISCSI_IF_H diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 79cac277e38b..2f64ed79cee9 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_LL2_IF_H diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 041a5b005a82..f464d85e88a4 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef _QED_RDMA_IF_H diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 20ed7f2c55bb..072da2f6da37 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qedr NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef QEDE_ROCE_H diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 3e3f01136c06..bab078b25834 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __RDMA_COMMON__ diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index 89065f023813..ccddd7a96b67 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __ROCE_COMMON__ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 34f069c79067..91896e8793bf 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __STORAGE_COMMON__ diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index 925e7cb7a582..2b2c87d10e0a 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2017 QLogic Corporation + * Copyright (c) 2019-2020 Marvell International Ltd. */ #ifndef __TCP_COMMON__ -- cgit v1.2.3 From 8b11c20a658de159fcf18ebce4f2acfdf747ff25 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 29 Jun 2020 14:03:41 +0200 Subject: phy: un-inline devm_mdiobus_register() Functions should only be static inline if they're very short. This devres helper is already over 10 lines and it will grow soon as we'll be improving upon its approach. Pull it into mdio_devres.c. Signed-off-by: Bartosz Golaszewski Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index ecc7640705b9..7c75e1c90141 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -322,20 +322,9 @@ static inline struct mii_bus *mdiobus_alloc(void) } int __mdiobus_register(struct mii_bus *bus, struct module *owner); +int __devm_mdiobus_register(struct mii_bus *bus, struct module *owner); #define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) -static inline int devm_mdiobus_register(struct mii_bus *bus) -{ - int ret; - - if (!bus->is_managed) - return -EPERM; - - ret = mdiobus_register(bus); - if (!ret) - bus->is_managed_registered = 1; - - return ret; -} +#define devm_mdiobus_register(bus) __devm_mdiobus_register(bus, THIS_MODULE) void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); -- cgit v1.2.3 From ac3a68d56651c3dad2c12c7afce065fe15267f44 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 29 Jun 2020 14:03:43 +0200 Subject: net: phy: don't abuse devres in devm_mdiobus_register() We currently have two managed helpers for mdiobus - devm_mdiobus_alloc() and devm_mdiobus_register(). The idea behind devres is that the release callback releases whatever resource the devm function allocates. In the mdiobus case however there's no devres associated with the device by devm_mdiobus_register(). Instead the release callback for devm_mdiobus_alloc(): _devm_mdiobus_free() unregisters the device if it is marked as managed. This all seems wrong. The managed structure shouldn't need to know or care about whether it's managed or not - and this is the case now for struct mii_bus. The devres wrapper should be opaque to the managed resource. This changeset makes devm_mdiobus_alloc() and devm_mdiobus_register() conform to common devres standards: devm_mdiobus_alloc() allocates a devres structure and registers a callback that will call mdiobus_free(). __devm_mdiobus_register() allocated another devres and registers a callback that will unregister the bus. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/phy.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7c75e1c90141..101a48fa6750 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -261,9 +261,6 @@ struct mii_bus { int (*reset)(struct mii_bus *bus); struct mdio_bus_stats stats[PHY_MAX_ADDR]; - unsigned int is_managed:1; /* is device-managed */ - unsigned int is_managed_registered:1; - /* * A lock to ensure that only one thing can read/write * the MDIO bus at a time @@ -322,9 +319,11 @@ static inline struct mii_bus *mdiobus_alloc(void) } int __mdiobus_register(struct mii_bus *bus, struct module *owner); -int __devm_mdiobus_register(struct mii_bus *bus, struct module *owner); +int __devm_mdiobus_register(struct device *dev, struct mii_bus *bus, + struct module *owner); #define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) -#define devm_mdiobus_register(bus) __devm_mdiobus_register(bus, THIS_MODULE) +#define devm_mdiobus_register(dev, bus) \ + __devm_mdiobus_register(dev, bus, THIS_MODULE) void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); @@ -335,7 +334,6 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) } struct mii_bus *mdio_find_bus(const char *mdio_name); -void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); #define PHY_INTERRUPT_DISABLED false -- cgit v1.2.3 From a0bd96f5aed2108505386733abe76f37362c2f50 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 29 Jun 2020 14:03:44 +0200 Subject: of: mdio: remove the 'extern' keyword from function declarations The 'extern' keyword in headers doesn't have any benefit. Remove them all from the of_mdio.h header. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 0f61a4ac6bcf..ba8e157f24ad 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -12,27 +12,26 @@ #include #if IS_ENABLED(CONFIG_OF_MDIO) -extern bool of_mdiobus_child_is_phy(struct device_node *child); -extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); -extern struct phy_device *of_phy_find_device(struct device_node *phy_np); -extern struct phy_device *of_phy_connect(struct net_device *dev, - struct device_node *phy_np, - void (*hndlr)(struct net_device *), - u32 flags, phy_interface_t iface); -extern struct phy_device * +bool of_mdiobus_child_is_phy(struct device_node *child); +int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); +struct phy_device *of_phy_find_device(struct device_node *phy_np); +struct phy_device * +of_phy_connect(struct net_device *dev, struct device_node *phy_np, + void (*hndlr)(struct net_device *), u32 flags, + phy_interface_t iface); +struct phy_device * of_phy_get_and_connect(struct net_device *dev, struct device_node *np, void (*hndlr)(struct net_device *)); -struct phy_device *of_phy_attach(struct net_device *dev, - struct device_node *phy_np, u32 flags, - phy_interface_t iface); - -extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); -extern int of_phy_register_fixed_link(struct device_node *np); -extern void of_phy_deregister_fixed_link(struct device_node *np); -extern bool of_phy_is_fixed_link(struct device_node *np); -extern int of_mdiobus_phy_device_register(struct mii_bus *mdio, - struct phy_device *phy, - struct device_node *child, u32 addr); +struct phy_device * +of_phy_attach(struct net_device *dev, struct device_node *phy_np, + u32 flags, phy_interface_t iface); + +struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +int of_phy_register_fixed_link(struct device_node *np); +void of_phy_deregister_fixed_link(struct device_node *np); +bool of_phy_is_fixed_link(struct device_node *np); +int of_mdiobus_phy_device_register(struct mii_bus *mdio, struct phy_device *phy, + struct device_node *child, u32 addr); static inline int of_mdio_parse_addr(struct device *dev, const struct device_node *np) -- cgit v1.2.3 From 14eeb6e086d6b9004c600e5f0f62bacb458ecfba Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 29 Jun 2020 14:03:45 +0200 Subject: of: mdio: provide devm_of_mdiobus_register() Implement a managed variant of of_mdiobus_register(). We need to make mdio_devres into its own module because otherwise we'd hit circular sumbol dependencies between phylib and of_mdio. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index ba8e157f24ad..1efb88d9f892 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -8,12 +8,15 @@ #ifndef __LINUX_OF_MDIO_H #define __LINUX_OF_MDIO_H +#include #include #include #if IS_ENABLED(CONFIG_OF_MDIO) bool of_mdiobus_child_is_phy(struct device_node *child); int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); +int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * of_phy_connect(struct net_device *dev, struct device_node *phy_np, -- cgit v1.2.3 From d141b8bc5773cbbaf5b8530f08f94fc10fff9e8c Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Jun 2020 23:28:43 -0700 Subject: perf: Expose get/put_callchain_entry() Sanitize and expose get/put_callchain_entry(). This would be used by bpf stack map. Suggested-by: Peter Zijlstra Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630062846.664389-2-songliubraving@fb.com --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4bb32082342..00ab5efa3833 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1244,6 +1244,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); +extern struct perf_callchain_entry *get_callchain_entry(int *rctx); +extern void put_callchain_entry(int rctx); extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; -- cgit v1.2.3 From fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Jun 2020 23:28:44 -0700 Subject: bpf: Introduce helper bpf_get_task_stack() Introduce helper bpf_get_task_stack(), which dumps stack trace of given task. This is different to bpf_get_stack(), which gets stack track of current task. One potential use case of bpf_get_task_stack() is to call it from bpf_iter__task and dump all /proc//stack to a seq_file. bpf_get_task_stack() uses stack_trace_save_tsk() instead of get_perf_callchain() for kernel stack. The benefit of this choice is that stack_trace_save_tsk() doesn't require changes in arch/. The downside of using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the stack trace to unsigned long array. For 32-bit systems, we need to translate it to u64 array. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3d2ade703a35..0cd7f6884c5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1627,6 +1627,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0cb8ec948816..da9bf35a26f8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3285,6 +3285,39 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *task*, which is a valid + * pointer to struct task_struct. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3427,7 +3460,9 @@ union bpf_attr { FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), + FN(skc_to_udp6_sock), \ + FN(get_task_stack), \ + /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 2cef30d7bd8b8fbddeb74e3753c29d4248c094e0 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Mon, 29 Jun 2020 16:13:27 +0300 Subject: xen: netif.h: add a new extra type for XDP The patch adds a new extra type to be able to diffirentiate between RX responses on xen-netfront side with the adjusted offset required for XDP processing. The offset value from a guest is passed via xenstore. Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- include/xen/interface/io/netif.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 4f20dbc42910..2194322c3c7f 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -160,6 +160,19 @@ * be applied if it is set. */ +/* + * "xdp-headroom" is used to request that extra space is added + * for XDP processing. The value is measured in bytes and passed by + * the frontend to be consistent between both ends. + * If the value is greater than zero that means that + * an RX response is going to be passed to an XDP program for processing. + * XEN_NETIF_MAX_XDP_HEADROOM defines the maximum headroom offset in bytes + * + * "feature-xdp-headroom" is set to "1" by the netback side like other features + * so a guest can check if an XDP program can be processed. + */ +#define XEN_NETIF_MAX_XDP_HEADROOM 0x7FFF + /* * Control ring * ============ @@ -846,7 +859,8 @@ struct xen_netif_tx_request { #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ -#define XEN_NETIF_EXTRA_TYPE_MAX (5) +#define XEN_NETIF_EXTRA_TYPE_XDP (5) /* u.xdp */ +#define XEN_NETIF_EXTRA_TYPE_MAX (6) /* xen_netif_extra_info_t flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) @@ -879,6 +893,10 @@ struct xen_netif_extra_info { uint8_t algorithm; uint8_t value[4]; } hash; + struct { + uint16_t headroom; + uint16_t pad[2]; + } xdp; uint16_t pad[3]; } u; }; -- cgit v1.2.3 From a3b658cfb66497525278cbf852913a04dbaae992 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 30 Jun 2020 14:49:41 -0400 Subject: bonding: allow xfrm offload setup post-module-load At the moment, bonding xfrm crypto offload can only be set up if the bonding module is loaded with active-backup mode already set. We need to be able to make this work with bonds set to AB after the bonding driver has already been loaded. So what's done here is: 1) move #define BOND_XFRM_FEATURES to net/bonding.h so it can be used by both bond_main.c and bond_options.c 2) set BOND_XFRM_FEATURES in bond_dev->hw_features universally, rather than only when loading in AB mode 3) wire up xfrmdev_ops universally too 4) disable BOND_XFRM_FEATURES in bond_dev->features if not AB 5) exit early (non-AB case) from bond_ipsec_offload_ok, to prevent a performance hit from traversing into the underlying drivers 5) toggle BOND_XFRM_FEATURES in bond_dev->wanted_features and call netdev_change_features() from bond_option_mode_set() In my local testing, I can change bonding modes back and forth on the fly, have hardware offload work when I'm in AB, and see no performance penalty to non-AB software encryption, despite having xfrm bits all wired up for all modes now. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Reported-by: Huy Nguyen CC: Saeed Mahameed CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org CC: intel-wired-lan@lists.osuosl.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/net/bonding.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index a00e1764e9b1..7d132cc1e584 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -86,6 +86,11 @@ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) +#ifdef CONFIG_XFRM_OFFLOAD +#define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ + NETIF_F_GSO_ESP) +#endif /* CONFIG_XFRM_OFFLOAD */ + #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; -- cgit v1.2.3 From e4266b991fead8eb996688e82ff39f6cc59ef7dd Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:05 +0200 Subject: bridge: uapi: mrp: Extend MRP attributes to get the status Add MRP attribute IFLA_BRIDGE_MRP_INFO to allow the userspace to get the current state of the MRP instances. This is a nested attribute that contains other attributes like, ring id, index of primary and secondary port, priority, ring state, ring role. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index caa6914a3e53..c114c1c2bd53 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -166,6 +166,7 @@ enum { IFLA_BRIDGE_MRP_RING_STATE, IFLA_BRIDGE_MRP_RING_ROLE, IFLA_BRIDGE_MRP_START_TEST, + IFLA_BRIDGE_MRP_INFO, __IFLA_BRIDGE_MRP_MAX, }; @@ -228,6 +229,22 @@ enum { #define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1) +enum { + IFLA_BRIDGE_MRP_INFO_UNSPEC, + IFLA_BRIDGE_MRP_INFO_RING_ID, + IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + IFLA_BRIDGE_MRP_INFO_PRIO, + IFLA_BRIDGE_MRP_INFO_RING_STATE, + IFLA_BRIDGE_MRP_INFO_RING_ROLE, + IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + __IFLA_BRIDGE_MRP_INFO_MAX, +}; + +#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; -- cgit v1.2.3 From 36a8e8e26542056bbd7eb5e047cadee30587d230 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:07 +0200 Subject: bridge: Extend br_fill_ifinfo to return MPR status This patch extends the function br_fill_ifinfo to return also the MRP status for each instance on a bridge. It also adds a new filter RTEXT_FILTER_MRP to return the MRP status only when this is set, not to interfer with the vlans. The MRP status is return only on the bridge interfaces. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 879e64950a0a..9b814c92de12 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -778,6 +778,7 @@ enum { #define RTEXT_FILTER_BRVLAN (1 << 1) #define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) #define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) /* End of information exported to user level */ -- cgit v1.2.3 From f0a5e4d7a594e0fe237d3dfafb069bb82f80f42f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Jul 2020 18:17:19 +0300 Subject: ipvs: allow connection reuse for unconfirmed conntrack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit YangYuxi is reporting that connection reuse is causing one-second delay when SYN hits existing connection in TIME_WAIT state. Such delay was added to give time to expire both the IPVS connection and the corresponding conntrack. This was considered a rare case at that time but it is causing problem for some environments such as Kubernetes. As nf_conntrack_tcp_packet() can decide to release the conntrack in TIME_WAIT state and to replace it with a fresh NEW conntrack, we can use this to allow rescheduling just by tuning our check: if the conntrack is confirmed we can not schedule it to different real server and the one-second delay still applies but if new conntrack was created, we are free to select new real server without any delays. YangYuxi lists some of the problem reports: - One second connection delay in masquerading mode: https://marc.info/?t=151683118100004&r=1&w=2 - IPVS low throughput #70747 https://github.com/kubernetes/kubernetes/issues/70747 - Apache Bench can fill up ipvs service proxy in seconds #544 https://github.com/cloudnativelabs/kube-router/issues/544 - Additional 1s latency in `host -> service IP -> pod` https://github.com/kubernetes/kubernetes/issues/90854 Fixes: f719e3754ee2 ("ipvs: drop first packet to redirect conntrack") Co-developed-by: YangYuxi Signed-off-by: YangYuxi Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0c9881241323..011f407b76fe 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1626,18 +1626,16 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) } #endif /* CONFIG_IP_VS_NFCT */ -/* Really using conntrack? */ -static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, - struct sk_buff *skb) +/* Using old conntrack that can not be redirected to another real server? */ +static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp, + struct sk_buff *skb) { #ifdef CONFIG_IP_VS_NFCT enum ip_conntrack_info ctinfo; struct nf_conn *ct; - if (!(cp->flags & IP_VS_CONN_F_NFCT)) - return false; ct = nf_ct_get(skb, &ctinfo); - if (ct) + if (ct && nf_ct_is_confirmed(ct)) return true; #endif return false; -- cgit v1.2.3 From 74cccc3d38438b346e40a4f8133cff3f0839ff84 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:11 +0200 Subject: netfilter: nf_tables: add NFTA_CHAIN_ID attribute This netlink attribute allows you to refer to chains inside a transaction as an alternative to the name and the handle. The chain binding support requires this new chain ID approach. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6f0f6fca9ac3..3e5226684017 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1433,6 +1433,7 @@ struct nft_trans_chain { char *name; struct nft_stats __percpu *stats; u8 policy; + u32 chain_id; }; #define nft_trans_chain_update(trans) \ @@ -1443,6 +1444,8 @@ struct nft_trans_chain { (((struct nft_trans_chain *)trans->data)->stats) #define nft_trans_chain_policy(trans) \ (((struct nft_trans_chain *)trans->data)->policy) +#define nft_trans_chain_id(trans) \ + (((struct nft_trans_chain *)trans->data)->chain_id) struct nft_trans_table { bool update; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4565456c0ef4..477779595b78 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -196,6 +196,7 @@ enum nft_table_attributes { * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) * @NFTA_CHAIN_FLAGS: chain flags + * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -209,6 +210,7 @@ enum nft_chain_attributes { NFTA_CHAIN_COUNTERS, NFTA_CHAIN_PAD, NFTA_CHAIN_FLAGS, + NFTA_CHAIN_ID, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -- cgit v1.2.3 From 837830a4b439bfeb86c70b0115c280377c84714b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:16 +0200 Subject: netfilter: nf_tables: add NFTA_RULE_CHAIN_ID attribute This new netlink attribute allows you to add rules to chains by the chain ID. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 477779595b78..2304d1b7ba5e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -240,6 +240,7 @@ enum nft_rule_attributes { NFTA_RULE_PAD, NFTA_RULE_ID, NFTA_RULE_POSITION_ID, + NFTA_RULE_CHAIN_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) -- cgit v1.2.3 From 51d70f181ff4e2c996ddf256af1efecd7d5864e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:21 +0200 Subject: netfilter: nf_tables: add NFTA_VERDICT_CHAIN_ID attribute This netlink attribute allows you to identify the chain to jump/goto by means of the chain ID. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2304d1b7ba5e..683e75126d68 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -471,11 +471,13 @@ enum nft_data_attributes { * * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + * @NFTA_VERDICT_CHAIN_ID: jump target chain ID (NLA_U32) */ enum nft_verdict_attributes { NFTA_VERDICT_UNSPEC, NFTA_VERDICT_CODE, NFTA_VERDICT_CHAIN, + NFTA_VERDICT_CHAIN_ID, __NFTA_VERDICT_MAX }; #define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) -- cgit v1.2.3 From 67c49de4ad862c567088c5119cf125e566f56e7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:25 +0200 Subject: netfilter: nf_tables: expose enum nft_chain_flags through UAPI This enum definition was never exposed through UAPI. Rename NFT_BASE_CHAIN to NFT_CHAIN_BASE for consistency. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +------ include/uapi/linux/netfilter/nf_tables.h | 5 +++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3e5226684017..6d1e7da6e00a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -921,11 +921,6 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, (expr) != (last); \ (expr) = nft_expr_next(expr)) -enum nft_chain_flags { - NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_HW_OFFLOAD = 0x2, -}; - #define NFT_CHAIN_POLICY_UNSET U8_MAX /** @@ -1036,7 +1031,7 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai static inline bool nft_is_base_chain(const struct nft_chain *chain) { - return chain->flags & NFT_BASE_CHAIN; + return chain->flags & NFT_CHAIN_BASE; } int __nft_release_basechain(struct nft_ctx *ctx); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 683e75126d68..2cf7cc3b50c1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -184,6 +184,11 @@ enum nft_table_attributes { }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) +enum nft_chain_flags { + NFT_CHAIN_BASE = (1 << 0), + NFT_CHAIN_HW_OFFLOAD = (1 << 1), +}; + /** * enum nft_chain_attributes - nf_tables chain netlink attributes * -- cgit v1.2.3 From d0e2c7de92c7f2b3d355ad76b0bb9fc43d1beb87 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:36 +0200 Subject: netfilter: nf_tables: add NFT_CHAIN_BINDING This new chain flag specifies that: * the kernel dynamically allocates the chain name, if no chain name is specified. * If the immediate expression that refers to this chain is removed, then this bound chain (and its content) is destroyed. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 13 ++++++++++++- include/uapi/linux/netfilter/nf_tables.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6d1e7da6e00a..822c26766330 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -899,6 +899,8 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) return (void *)&rule->data[rule->dlen]; } +void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule); + static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -944,7 +946,8 @@ struct nft_chain { struct nft_table *table; u64 handle; u32 use; - u8 flags:6, + u8 flags:5, + bound:1, genmask:2; char *name; @@ -989,6 +992,14 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); +static inline bool nft_chain_is_bound(struct nft_chain *chain) +{ + return (chain->flags & NFT_CHAIN_BINDING) && chain->bound; +} + +void nft_chain_del(struct nft_chain *chain); +void nf_tables_chain_destroy(struct nft_ctx *ctx); + struct nft_stats { u64 bytes; u64 pkts; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2cf7cc3b50c1..e00b4ae6174e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -187,6 +187,7 @@ enum nft_table_attributes { enum nft_chain_flags { NFT_CHAIN_BASE = (1 << 0), NFT_CHAIN_HW_OFFLOAD = (1 << 1), + NFT_CHAIN_BINDING = (1 << 2), }; /** -- cgit v1.2.3 From c1f79a2eefdcc0aef5d7a911c27a3f75f1936ecd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 4 Jul 2020 02:51:28 +0200 Subject: netfilter: nf_tables: reject unsupported chain flags Bail out if userspace sends unsupported chain flags. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e00b4ae6174e..42f351c1f5c5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -189,6 +189,9 @@ enum nft_chain_flags { NFT_CHAIN_HW_OFFLOAD = (1 << 1), NFT_CHAIN_BINDING = (1 << 2), }; +#define NFT_CHAIN_FLAGS (NFT_CHAIN_BASE | \ + NFT_CHAIN_HW_OFFLOAD | \ + NFT_CHAIN_BINDING) /** * enum nft_chain_attributes - nf_tables chain netlink attributes -- cgit v1.2.3 From b1c7b87443c2c12c4fe095114736b8ad6f963f67 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 5 Jul 2020 19:16:22 +0300 Subject: net: dsa: felix: support half-duplex link modes Ping tested: [ 11.808455] mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Full - flow control rx/tx [ 11.816497] IPv6: ADDRCONF(NETDEV_CHANGE): swp0: link becomes ready [root@LS1028ARDB ~] # ethtool -s swp0 advertise 0x4 [ 18.844591] mscc_felix 0000:00:00.5 swp0: Link is Down [ 22.048337] mscc_felix 0000:00:00.5 swp0: Link is Up - 100Mbps/Half - flow control off [root@LS1028ARDB ~] # ip addr add 192.168.1.1/24 dev swp0 [root@LS1028ARDB ~] # ping 192.168.1.2 PING 192.168.1.2 (192.168.1.2): 56 data bytes (...) ^C--- 192.168.1.2 ping statistics --- 3 packets transmitted, 3 packets received, 0% packet loss round-trip min/avg/max = 0.383/0.611/1.051 ms [root@LS1028ARDB ~] # ethtool -s swp0 advertise 0x10 [ 355.637747] mscc_felix 0000:00:00.5 swp0: Link is Down [ 358.788034] mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Half - flow control off [root@LS1028ARDB ~] # ping 192.168.1.2 PING 192.168.1.2 (192.168.1.2): 56 data bytes (...) ^C --- 192.168.1.2 ping statistics --- 16 packets transmitted, 16 packets received, 0% packet loss round-trip min/avg/max = 0.301/0.384/1.138 ms Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/linux/fsl/enetc_mdio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h index 4875dd38af7e..2d9203314865 100644 --- a/include/linux/fsl/enetc_mdio.h +++ b/include/linux/fsl/enetc_mdio.h @@ -15,6 +15,7 @@ #define ENETC_PCS_IF_MODE_SGMII_EN BIT(0) #define ENETC_PCS_IF_MODE_USE_SGMII_AN BIT(1) #define ENETC_PCS_IF_MODE_SGMII_SPEED(x) (((x) << 2) & GENMASK(3, 2)) +#define ENETC_PCS_IF_MODE_DUPLEX_HALF BIT(3) /* Not a mistake, the SerDes PLL needs to be set at 3.125 GHz by Reset * Configuration Word (RCW, outside Linux control) for 2.5G SGMII mode. The PCS -- cgit v1.2.3 From 5ab903418ad14732131df0af0d63f19b73e377ae Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 6 Jul 2020 18:38:19 +0300 Subject: net: qed: sanitize BE/LE data processing Current code assumes that both host and device operates in Little Endian in lots of places. While this is true for x86 platform, this doesn't mean we should not care about this. This commit addresses all parts of the code that were pointed out by sparse checker. All operations with restricted (__be*/__le*) types are now protected with explicit from/to CPU conversions, even if they're noops on common setups. I'm sure there are more such places, but this implies a deeper code investigation, and is a subject for future works. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5ca081cd2ed9..90e1060da02b 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1403,16 +1403,15 @@ static inline void qed_sb_ack(struct qed_sb_info *sb_info, enum igu_int_cmd int_cmd, u8 upd_flg) { - struct igu_prod_cons_update igu_ack = { 0 }; + u32 igu_ack; - igu_ack.sb_id_and_flags = - ((sb_info->sb_ack << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) | - (upd_flg << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) | - (int_cmd << IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT) | - (IGU_SEG_ACCESS_REG << - IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT)); + igu_ack = ((sb_info->sb_ack << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) | + (upd_flg << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) | + (int_cmd << IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT) | + (IGU_SEG_ACCESS_REG << + IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT)); - DIRECT_REG_WR(sb_info->igu_addr, igu_ack.sb_id_and_flags); + DIRECT_REG_WR(sb_info->igu_addr, igu_ack); /* Both segments (interrupts & acks) are written to same place address; * Need to guarantee all commands will be received (in-order) by HW. -- cgit v1.2.3 From 49b020c1d236a36a4533e7db6d2604cb57ed4c51 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 29 Jun 2020 16:11:00 +0000 Subject: Bluetooth: Adding a configurable autoconnect timeout This patch adds a configurable LE autoconnect timeout. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 836dc997ff94..34ad5b207598 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -376,6 +376,7 @@ struct hci_dev { __u16 def_br_lsto; __u16 def_page_timeout; __u16 def_multi_adv_rotation_duration; + __u16 def_le_autoconnect_timeout; __u16 pkt_type; __u16 esco_type; -- cgit v1.2.3 From 3970ed49a46bd0b062870edcc0894b2bd820371d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 7 Jul 2020 03:49:35 +0200 Subject: net: phy: Properly define genphy_c45_driver Avoid the W=1 warning that symbol 'genphy_c45_driver' was not declared. Should it be static? Declare it on the phy header file. Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 101a48fa6750..1592c3d0e12f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1385,6 +1385,9 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); +/* Generic C45 PHY driver */ +extern struct phy_driver genphy_c45_driver; + /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); -- cgit v1.2.3 From 4895d7808e7030c831f2ef83a3cc6ec0d46c30b1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Jul 2020 21:27:56 -0700 Subject: net: ethtool: Introduce ethtool_phy_ops In order to decouple ethtool from its PHY library dependency, define an ethtool_phy_ops singleton which can be overriden by the PHY library when it loads with an appropriate set of function pointers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ethtool.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 48ad3b6a0150..0c139a93b67a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -502,5 +502,30 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +struct netlink_ext_ack; +struct phy_device; +struct phy_tdr_config; + +/** + * struct ethtool_phy_ops - Optional PHY device options + * @start_cable_test - Start a cable test + * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test + * + * All operations are optional (i.e. the function pointer may be set to %NULL) + * and callers must take this into account. Callers must hold the RTNL lock. + */ +struct ethtool_phy_ops { + int (*start_cable_test)(struct phy_device *phydev, + struct netlink_ext_ack *extack); + int (*start_cable_test_tdr)(struct phy_device *phydev, + struct netlink_ext_ack *extack, + const struct phy_tdr_config *config); +}; + +/** + * ethtool_set_ethtool_phy_ops - Set the ethtool_phy_ops singleton + * @ops: Ethtool PHY operations to set + */ +void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops); #endif /* _LINUX_ETHTOOL_H */ -- cgit v1.2.3 From f5836749c9c04a10decd2742845ad4870965fdef Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 6 Jul 2020 16:01:25 -0700 Subject: bpf: Add BPF_CGROUP_INET_SOCK_RELEASE hook Sometimes it's handy to know when the socket gets freed. In particular, we'd like to try to use a smarter allocation of ports for bpf_bind and explore the possibility of limiting the number of SOCK_DGRAM sockets the process can have. Implement BPF_CGROUP_INET_SOCK_RELEASE hook that triggers on inet socket release. It triggers only for userspace sockets (not in-kernel ones) and therefore has the same semantics as the existing BPF_CGROUP_INET_SOCK_CREATE. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200706230128.4073544-2-sdf@google.com --- include/linux/bpf-cgroup.h | 4 ++++ include/uapi/linux/bpf.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c66c545e161a..2c6f26670acc 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -210,6 +210,9 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE) +#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \ + BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE) + #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND) @@ -401,6 +404,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da9bf35a26f8..548a749aebb3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -226,6 +226,7 @@ enum bpf_attach_type { BPF_CGROUP_INET4_GETSOCKNAME, BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, + BPF_CGROUP_INET_SOCK_RELEASE, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From bd36ed1c935144a0e3b788bba659258f666e7b5b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2020 09:46:24 -0700 Subject: net: phy: Define PHY statistics ethtool_phy_ops Extend ethtool_phy_ops to include the 3 function pointers necessary for implementing PHY statistics. In a subsequent change we will uninline those functions. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0c139a93b67a..969a80211df6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -508,6 +508,9 @@ struct phy_tdr_config; /** * struct ethtool_phy_ops - Optional PHY device options + * @get_sset_count: Get number of strings that @get_strings will write. + * @get_strings: Return a set of strings that describe the requested objects + * @get_stats: Return extended statistics about the PHY device. * @start_cable_test - Start a cable test * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test * @@ -515,6 +518,10 @@ struct phy_tdr_config; * and callers must take this into account. Callers must hold the RTNL lock. */ struct ethtool_phy_ops { + int (*get_sset_count)(struct phy_device *dev); + int (*get_strings)(struct phy_device *dev, u8 *data); + int (*get_stats)(struct phy_device *dev, + struct ethtool_stats *stats, u64 *data); int (*start_cable_test)(struct phy_device *phydev, struct netlink_ext_ack *extack); int (*start_cable_test_tdr)(struct phy_device *phydev, -- cgit v1.2.3 From 17809516a03a045565ad6a80e6241754615871ac Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2020 09:46:25 -0700 Subject: net: phy: Uninline PHY ethtool statistics operations Now that we have moved the PHY ethtool statistics to be dynamically registered, we no longer need to inline those for ethtool. This used to be done to avoid cross symbol referencing and allow ethtool to be decoupled from PHYLIB entirely. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 49 ++++--------------------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1592c3d0e12f..0403eb799913 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1474,51 +1474,10 @@ int __init mdio_bus_init(void); void mdio_bus_exit(void); #endif -/* Inline function for use within net/core/ethtool.c (built-in) */ -static inline int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data) -{ - if (!phydev->drv) - return -EIO; - - mutex_lock(&phydev->lock); - phydev->drv->get_strings(phydev, data); - mutex_unlock(&phydev->lock); - - return 0; -} - -static inline int phy_ethtool_get_sset_count(struct phy_device *phydev) -{ - int ret; - - if (!phydev->drv) - return -EIO; - - if (phydev->drv->get_sset_count && - phydev->drv->get_strings && - phydev->drv->get_stats) { - mutex_lock(&phydev->lock); - ret = phydev->drv->get_sset_count(phydev); - mutex_unlock(&phydev->lock); - - return ret; - } - - return -EOPNOTSUPP; -} - -static inline int phy_ethtool_get_stats(struct phy_device *phydev, - struct ethtool_stats *stats, u64 *data) -{ - if (!phydev->drv) - return -EIO; - - mutex_lock(&phydev->lock); - phydev->drv->get_stats(phydev, stats, data); - mutex_unlock(&phydev->lock); - - return 0; -} +int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data); +int phy_ethtool_get_sset_count(struct phy_device *phydev); +int phy_ethtool_get_stats(struct phy_device *phydev, + struct ethtool_stats *stats, u64 *data); static inline int phy_package_read(struct phy_device *phydev, u32 regnum) { -- cgit v1.2.3 From 065e0d42a0a728d7f6c2aec7c9f3e5dc7b715394 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 6 Jul 2020 20:42:32 -0700 Subject: ethtool: Add support for 100Gbps per lane link modes Define 100G, 200G and 400G link modes using 100Gbps per lane LR, ER and FR are defined as a single link mode because they are using same technology and by design are fully interoperable. EEPROM content indicates if the module is LR, ER, or FR, and the user space ethtool decoder is planned to support decoding these modes in the EEPROM. Signed-off-by: Meir Lichtinger CC: Andrew Lunn Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1413538ef30..60856e0f9618 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1600,6 +1600,21 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; -- cgit v1.2.3 From 12fdafb817c6cde87a4fa0e674e66b0226a0889d Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 6 Jul 2020 20:42:33 -0700 Subject: net/mlx5: Added support for 100Gbps per lane link modes This patch exposes new link modes using 100Gbps per lane, including 100G, 200G and 400G modes. Signed-off-by: Meir Lichtinger Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index de9a272c9f3d..2d45a6af52a4 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -104,8 +104,11 @@ enum mlx5e_ext_link_mode { MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR = 8, MLX5E_CAUI_4_100GBASE_CR4_KR4 = 9, MLX5E_100GAUI_2_100GBASE_CR2_KR2 = 10, + MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, + MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, MLX5E_EXT_LINK_MODES_NUMBER, }; -- cgit v1.2.3 From efd7fe68f0c6c9649757bf80cbc382fd21e764c9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jul 2020 14:25:36 +0200 Subject: net: dsa: tag_rtl4_a: Implement Realtek 4 byte A tag This implements the known parts of the Realtek 4 byte tag protocol version 0xA, as found in the RTL8366RB DSA switch. It is designated as protocol version 0xA as a different Realtek 4 byte tag format with protocol version 0x9 is known to exist in the Realtek RTL8306 chips. The tag and switch chip lacks public documentation, so the tag format has been reverse-engineered from packet dumps. As only ingress traffic has been available for analysis an egress tag has not been possible to develop (even using educated guesses about bit fields) so this is as far as it gets. It is not known if the switch even supports egress tagging. Excessive attempts to figure out the egress tag format was made. When nothing else worked, I just tried all bit combinations with 0xannp where a is protocol and p is port. I looped through all values several times trying to get a response from ping, without any positive result. Using just these ingress tags however, the switch functionality is vastly improved and the packets find their way into the destination port without any tricky VLAN configuration. On the D-Link DIR-685 the LAN ports now come up and respond to ping without any command line configuration so this is a real improvement for users. Egress packets need to be restricted to the proper target ports using VLAN, which the RTL8366RB DSA switch driver already sets up. Cc: DENG Qingfang Cc: Mauri Sandberg Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4046ccd1945d..b28c95c76762 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -44,6 +44,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_KSZ8795_VALUE 14 #define DSA_TAG_PROTO_OCELOT_VALUE 15 #define DSA_TAG_PROTO_AR9331_VALUE 16 +#define DSA_TAG_PROTO_RTL4_A_VALUE 17 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -63,6 +64,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, + DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, }; struct packet_type; -- cgit v1.2.3 From 1475ee0ac9a16dd5df23ca8abe1039eb6086eb66 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:29 +0800 Subject: xfrm: add is_ipip to struct xfrm_input_afinfo This patch is to add a new member is_ipip to struct xfrm_input_afinfo, to allow another group family of callback functions to be registered with is_ipip set. This will be used for doing a callback for struct xfrm(6)_tunnel of ipip/ipv6 tunnels in xfrm_input() by calling xfrm_rcv_cb(), which is needed by ipip/ipv6 tunnels' support in ip(6)_vti and xfrm interface in the next patches. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e20b2b27ec48..4666bc9e59ab 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -373,7 +373,8 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { - unsigned int family; + u8 family; + bool is_ipip; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; -- cgit v1.2.3 From 6df2db5d37ba3df8c80d90c15f1e20480be43f75 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:30 +0800 Subject: tunnel4: add cb_handler to struct xfrm_tunnel This patch is to register a callback function tunnel4_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel4 and tunnel64. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel4_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4666bc9e59ab..c1ec6294d773 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1416,6 +1416,7 @@ struct xfrm6_protocol { /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm_tunnel __rcu *next; -- cgit v1.2.3 From 86afc7031826147407e96412668d343e0f1bd6fd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:31 +0800 Subject: tunnel6: add tunnel6_input_afinfo for ipip and ipv6 tunnels This patch is to register a callback function tunnel6_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel6 and tunnel46. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET6 and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel6_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c1ec6294d773..83a532dda1bd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1425,6 +1425,7 @@ struct xfrm_tunnel { struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel __rcu *next; -- cgit v1.2.3 From 3f935c75eb52dd968351dba824adf466fb9c9429 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:39 +0200 Subject: inet_diag: support for wider protocol numbers After commit bf9765145b85 ("sock: Make sk_protocol a 16-bit value") the current size of 'sdiag_protocol' is not sufficient to represent the possible protocol values. This change introduces a new inet diag request attribute to let user space specify the relevant protocol number using u32 values. The attribute is parsed by inet diag core on get/dump command and the extended protocol value, if available, is preferred to 'sdiag_protocol' to lookup the diag handler. The parse attributed are exposed to all the diag handlers via the cb->data. Note that inet_diag_dump_one_icsk() is left unmodified, as it will not be used by protocol using the extended attribute. Suggested-by: David S. Miller Co-developed-by: Christoph Paasch Signed-off-by: Christoph Paasch Acked-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index e6f183ee8417..5ba122c1949a 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -65,6 +65,7 @@ enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, INET_DIAG_REQ_SK_BPF_STORAGES, + INET_DIAG_REQ_PROTOCOL, __INET_DIAG_REQ_MAX, }; -- cgit v1.2.3 From ac3b45f6095452a9731f8825be1513d326dbfa15 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:41 +0200 Subject: mptcp: add MPTCP socket diag interface exposes basic inet socket attribute, plus some MPTCP socket fields comprising PM status and MPTCP-level sequence numbers. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 5f2c77082d9e..9762660df741 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -86,4 +86,21 @@ enum { __MPTCP_PM_CMD_AFTER_LAST }; +#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) +#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; +}; + #endif /* _UAPI_MPTCP_H */ -- cgit v1.2.3 From 10a429bab4462581bbda3fd7f41d4ec0ddc5e682 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:14 +0300 Subject: devlink: Move set attribute of devlink_port_attrs to devlink_port The struct devlink_port_attrs holds the attributes of devlink_port. The 'set' field is not devlink_port's attribute as opposed to most of the others. Move 'set' to be devlink_port's field called 'attrs_set'. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 428f55f8197c..28f8d92c5741 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -65,8 +65,7 @@ struct devlink_port_pci_vf_attrs { }; struct devlink_port_attrs { - u8 set:1, - split:1, + u8 split:1, switch_port:1; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; @@ -90,6 +89,7 @@ struct devlink_port { enum devlink_port_type desired_type; void *type_dev; struct devlink_port_attrs attrs; + u8 attrs_set:1; struct delayed_work type_warn_dw; }; -- cgit v1.2.3 From 46737a194945e540e3e2eb1fc870207928a9c2eb Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:15 +0300 Subject: devlink: Move switch_port attribute of devlink_port_attrs to devlink_port The struct devlink_port_attrs holds the attributes of devlink_port. Similarly to the previous patch, 'switch_port' attribute is another exception. Move 'switch_port' to be devlink_port's field. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 28f8d92c5741..de4b5dcdb4a5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -65,8 +65,7 @@ struct devlink_port_pci_vf_attrs { }; struct devlink_port_attrs { - u8 split:1, - switch_port:1; + u8 split:1; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; union { @@ -89,7 +88,8 @@ struct devlink_port { enum devlink_port_type desired_type; void *type_dev; struct devlink_port_attrs attrs; - u8 attrs_set:1; + u8 attrs_set:1, + switch_port:1; struct delayed_work type_warn_dw; }; -- cgit v1.2.3 From 71ad8d55f8e5ea101069b552422f392655e2ffb6 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:16 +0300 Subject: devlink: Replace devlink_port_attrs_set parameters with a struct Currently, devlink_port_attrs_set accepts a long list of parameters, that most of them are devlink port's attributes. Use the devlink_port_attrs struct to replace the relevant parameters. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index de4b5dcdb4a5..8f9db991192d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -52,7 +52,7 @@ struct devlink_port_phys_attrs { * A physical port which is visible to the user * for a given port flavour. */ - u32 split_subport_number; + u32 split_subport_number; /* If the port is split, this is the number of subport. */ }; struct devlink_port_pci_pf_attrs { @@ -64,6 +64,12 @@ struct devlink_port_pci_vf_attrs { u16 vf; /* Associated PCI VF for of the PCI PF for this port. */ }; +/** + * struct devlink_port_attrs - devlink port object + * @flavour: flavour of the port + * @split: indicates if this is split port + * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL + */ struct devlink_port_attrs { u8 split:1; enum devlink_port_flavour flavour; @@ -1180,17 +1186,9 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number, - const unsigned char *switch_id, - unsigned char switch_id_len); -void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf); + struct devlink_port_attrs *devlink_port_attrs); +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf, u16 vf); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, -- cgit v1.2.3 From a21cf0a8330bba60e44ca6c99e1591042f336ff5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:18 +0300 Subject: devlink: Add a new devlink port lanes attribute and pass to netlink Add a new devlink port attribute that indicates the port's number of lanes. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. The attribute is not passed to user space in case the number of lanes is invalid (0). Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8f9db991192d..91a9f8770d08 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -68,10 +68,12 @@ struct devlink_port_pci_vf_attrs { * struct devlink_port_attrs - devlink port object * @flavour: flavour of the port * @split: indicates if this is split port + * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL */ struct devlink_port_attrs { u8 split:1; + u32 lanes; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; union { diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 87c83a82991b..f741ab8d9cf0 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -455,6 +455,8 @@ enum devlink_attr { DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ + DEVLINK_ATTR_PORT_LANES, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a0f49b54865273c895be3826d6d59cbc5ad725c2 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:20 +0300 Subject: devlink: Add a new devlink port split ability attribute and pass to netlink Add a new attribute that indicates the split ability of devlink port. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 +++- include/uapi/linux/devlink.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 91a9f8770d08..746bed538664 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -68,11 +68,13 @@ struct devlink_port_pci_vf_attrs { * struct devlink_port_attrs - devlink port object * @flavour: flavour of the port * @split: indicates if this is split port + * @splittable: indicates if the port can be split. * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL */ struct devlink_port_attrs { - u8 split:1; + u8 split:1, + splittable:1; u32 lanes; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f741ab8d9cf0..cfef4245ea5a 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -456,6 +456,7 @@ enum devlink_attr { DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ DEVLINK_ATTR_PORT_LANES, /* u32 */ + DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From a2b992c828f7651db369ba8f0eb0818d70232636 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:44 -0700 Subject: debugfs: make sure we can remove u32_array files cleanly debugfs_create_u32_array() allocates a small structure to wrap the data and size information about the array. If users ever try to remove the file this leads to a leak since nothing ever frees this wrapper. That said there are no upstream users of debugfs_create_u32_array() that'd remove a u32 array file (we only have one u32 array user in CMA), so there is no real bug here. Make callers pass a wrapper they allocated. This way the lifetime management of the wrapper is on the caller, and we can avoid the potential leak in debugfs. CC: Chucheng Luo Signed-off-by: Jakub Kicinski Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/linux/debugfs.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 63cb3606dea7..851dd1f9a8a5 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -38,6 +38,11 @@ struct debugfs_regset32 { struct device *dev; /* Optional device for Runtime PM */ }; +struct debugfs_u32_array { + u32 *array; + u32 n_elements; +}; + extern struct dentry *arch_debugfs_dir; #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ @@ -136,7 +141,8 @@ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, u32 *array, u32 elements); + struct dentry *parent, + struct debugfs_u32_array *array); struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, @@ -316,8 +322,8 @@ static inline bool debugfs_initialized(void) } static inline void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, u32 *array, - u32 elements) + struct dentry *parent, + struct debugfs_u32_array *array) { } -- cgit v1.2.3 From 84a4160e5a5951357947ad296932b433de3e34a0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:45 -0700 Subject: udp_tunnel: re-number the offload tunnel types Make it possible to use tunnel types as flags more easily. There doesn't appear to be any user using the type as an array index, so this should make no difference. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index e7312ceb2794..0615e25f041c 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -106,9 +106,9 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { - UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */ - UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */ - UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */ + UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ + UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ + UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { -- cgit v1.2.3 From cc4e3835eff474aa274d6e1d18f69d9d296d3b76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:46 -0700 Subject: udp_tunnel: add central NIC RX port offload infrastructure Cater to devices which: (a) may want to sleep in the callbacks; (b) only have IPv4 support; (c) need all the programming to happen while the netdev is up. Drivers attach UDP tunnel offload info struct to their netdevs, where they declare how many UDP ports of various tunnel types they support. Core takes care of tracking which ports to offload. Use a fixed-size array since this matches what almost all drivers do, and avoids a complexity and uncertainty around memory allocations in an atomic context. Make sure that tunnel drivers don't try to replay the ports when new NIC netdev is registered. Automatic replays would mess up reference counting, and will be removed completely once all drivers are converted. v4: - use a #define NULL to avoid build issues with CONFIG_INET=n. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++ include/net/udp_tunnel.h | 137 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 39e28e11863c..ac2cd3f49aba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -65,6 +65,8 @@ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; +struct udp_tunnel_nic_info; +struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; @@ -1836,6 +1838,10 @@ enum netdev_priv_flags { * * @macsec_ops: MACsec offloading ops * + * @udp_tunnel_nic_info: static structure describing the UDP tunnel + * offload capabilities of the device + * @udp_tunnel_nic: UDP tunnel offload state + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2134,6 +2140,8 @@ struct net_device { /* MACsec management functions */ const struct macsec_ops *macsec_ops; #endif + const struct udp_tunnel_nic_info *udp_tunnel_nic_info; + struct udp_tunnel_nic *udp_tunnel_nic; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0615e25f041c..ee34619e4cfa 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -115,6 +115,7 @@ struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; + u8 hw_priv; }; /* Notify network devices of offloadable types */ @@ -181,4 +182,140 @@ static inline void udp_tunnel_encap_enable(struct socket *sock) udp_encap_enable(); } +#define UDP_TUNNEL_NIC_MAX_TABLES 4 + +enum udp_tunnel_nic_info_flags { + /* Device callbacks may sleep */ + UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), + /* Device only supports offloads when it's open, all ports + * will be removed before close and re-added after open. + */ + UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), + /* Device supports only IPv4 tunnels */ + UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), +}; + +/** + * struct udp_tunnel_nic_info - driver UDP tunnel offload information + * @set_port: callback for adding a new port + * @unset_port: callback for removing a port + * @sync_table: callback for syncing the entire port table at once + * @flags: device flags from enum udp_tunnel_nic_info_flags + * @tables: UDP port tables this device has + * @tables.n_entries: number of entries in this table + * @tables.tunnel_types: types of tunnels this table accepts + * + * Drivers are expected to provide either @set_port and @unset_port callbacks + * or the @sync_table callback. Callbacks are invoked with rtnl lock held. + * + * Known limitations: + * - UDP tunnel port notifications are fundamentally best-effort - + * it is likely the driver will both see skbs which use a UDP tunnel port, + * while not being a tunneled skb, and tunnel skbs from other ports - + * drivers should only use these ports for non-critical RX-side offloads, + * e.g. the checksum offload; + * - none of the devices care about the socket family at present, so we don't + * track it. Please extend this code if you care. + */ +struct udp_tunnel_nic_info { + /* one-by-one */ + int (*set_port)(struct net_device *dev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti); + int (*unset_port)(struct net_device *dev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti); + + /* all at once */ + int (*sync_table)(struct net_device *dev, unsigned int table); + + unsigned int flags; + + struct udp_tunnel_nic_table_info { + unsigned int n_entries; + unsigned int tunnel_types; + } tables[UDP_TUNNEL_NIC_MAX_TABLES]; +}; + +/* UDP tunnel module dependencies + * + * Tunnel drivers are expected to have a hard dependency on the udp_tunnel + * module. NIC drivers are not, they just attach their + * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. + * Loading a tunnel driver will cause the udp_tunnel module to be loaded + * and only then will all the required state structures be allocated. + * Since we want a weak dependency from the drivers and the core to udp_tunnel + * we call things through the following stubs. + */ +struct udp_tunnel_nic_ops { + void (*get_port)(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti); + void (*set_port_priv)(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv); + void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); + void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); + void (*reset_ntf)(struct net_device *dev); +}; + +#ifdef CONFIG_INET +extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; +#else +#define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) +#endif + +static inline void +udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + /* This helper is used from .sync_table, we indicate empty entries + * by zero'ed @ti. Drivers which need to know the details of a port + * when it gets deleted should use the .set_port / .unset_port + * callbacks. + * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. + */ + memset(ti, 0, sizeof(*ti)); + + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->get_port(dev, table, idx, ti); +} + +static inline void +udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); +} + +static inline void +udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->add_port(dev, ti); +} + +static inline void +udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->del_port(dev, ti); +} + +/** + * udp_tunnel_nic_reset_ntf() - device-originating reset notification + * @dev: network interface device structure + * + * Called by the driver to inform the core that the entire UDP tunnel port + * state has been lost, usually due to device reset. Core will assume device + * forgot all the ports and issue .set_port and .sync_table callbacks as + * necessary. + * + * This function must be called with rtnl lock held, and will issue all + * the callbacks before returning. + */ +static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->reset_ntf(dev); +} #endif -- cgit v1.2.3 From c7d759eb7b12f91a25f4d3cd03ff5209046ddfc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:47 -0700 Subject: ethtool: add tunnel info interface Add an interface to report offloaded UDP ports via ethtool netlink. Now that core takes care of tracking which UDP tunnel ports the NICs are aware of we can quite easily export this information out to user space. The responsibility of writing the netlink dumps is split between ethtool code and udp_tunnel_nic.c - since udp_tunnel module may not always be loaded, yet we should always report the capabilities of the NIC. $ ethtool --show-tunnels eth0 Tunnel information for eth0: UDP port table 0: Size: 4 Types: vxlan No entries UDP port table 1: Size: 4 Types: geneve, vxlan-gpe Entries (1): port 1230, vxlan-gpe v4: - back to v2, build fix is now directly in udp_tunnel.h v3: - don't compile ETHTOOL_MSG_TUNNEL_INFO_GET in if CONFIG_INET not set. v2: - fix string set count, - reorder enums in the uAPI, - fix type of ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES to bitset in docs and comments. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 21 ++++++++++++++ include/uapi/linux/ethtool.h | 2 ++ include/uapi/linux/ethtool_netlink.h | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index ee34619e4cfa..dd20ce99740c 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -255,6 +255,10 @@ struct udp_tunnel_nic_ops { void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*reset_ntf)(struct net_device *dev); + + size_t (*dump_size)(struct net_device *dev, unsigned int table); + int (*dump_write)(struct net_device *dev, unsigned int table, + struct sk_buff *skb); }; #ifdef CONFIG_INET @@ -318,4 +322,21 @@ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->reset_ntf(dev); } + +static inline size_t +udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) +{ + if (!udp_tunnel_nic_ops) + return 0; + return udp_tunnel_nic_ops->dump_size(dev, table); +} + +static inline int +udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, + struct sk_buff *skb) +{ + if (!udp_tunnel_nic_ops) + return 0; + return udp_tunnel_nic_ops->dump_write(dev, table, skb); +} #endif diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 60856e0f9618..b4f2d134e713 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -669,6 +669,7 @@ enum ethtool_link_ext_substate_cable_issue { * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -686,6 +687,7 @@ enum ethtool_stringset { ETH_SS_SOF_TIMESTAMPING, ETH_SS_TS_TX_TYPES, ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, /* add new constants above here */ ETH_SS_COUNT diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c12ce4df4b6b..5dcd24cb33ea 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -41,6 +41,7 @@ enum { ETHTOOL_MSG_TSINFO_GET, ETHTOOL_MSG_CABLE_TEST_ACT, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -556,6 +557,60 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From f4f541660121aeb91a1b462ab3f3c5a86ab7c3dd Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:10 +0300 Subject: devlink: Implement devlink health reporters on per-port basis Add devlink-health reporter support on per-port basis. The main difference existing devlink-health is that port reporters are stored in per-devlink_port lists. Upon creation of such health reporter the reference to a port it belongs to is stored in reporter struct. Fill the port index attribute in devlink-health response to allow devlink userspace utility to distinguish between device and port reporters. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 746bed538664..bb1139752405 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -101,6 +101,8 @@ struct devlink_port { u8 attrs_set:1, switch_port:1; struct delayed_work type_warn_dw; + struct list_head reporter_list; + struct mutex reporters_lock; /* Protects reporter_list */ }; struct devlink_sb_pool_info { -- cgit v1.2.3 From 15c724b997a8fe1a677cf11797fb29c0bdecc63f Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:11 +0300 Subject: devlink: Add devlink health port reporters API In order to use new devlink port health reporters infrastructure, add corresponding constructor and destructor functions. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index bb1139752405..913e8679ae35 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1338,9 +1338,18 @@ struct devlink_health_reporter * devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, u64 graceful_period, void *priv); + +struct devlink_health_reporter * +devlink_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv); + void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); +void +devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter); + void * devlink_health_reporter_priv(struct devlink_health_reporter *reporter); int devlink_health_report(struct devlink_health_reporter *reporter, -- cgit v1.2.3 From 5a2798ab32ba2952cfe25701ee460bccbd434c75 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 11 Jul 2020 23:53:23 +0200 Subject: bpf: Add BTF_ID_LIST/BTF_ID/BTF_ID_UNUSED macros Adding support to generate .BTF_ids section that will hold BTF ID lists for verifier. Adding macros that will help to define lists of BTF ID values placed in .BTF_ids section. They are initially filled with zeros (during compilation) and resolved later during the linking phase by resolve_btfids tool. Following defines list of one BTF ID value: BTF_ID_LIST(bpf_skb_output_btf_ids) BTF_ID(struct, sk_buff) It also defines following variable to access the list: extern u32 bpf_skb_output_btf_ids[]; The BTF_ID_UNUSED macro defines 4 zero bytes. It's used when we want to define 'unused' entry in BTF_ID_LIST, like: BTF_ID_LIST(bpf_skb_output_btf_ids) BTF_ID(struct, sk_buff) BTF_ID_UNUSED BTF_ID(struct, task_struct) Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200711215329.41165-4-jolsa@kernel.org --- include/asm-generic/vmlinux.lds.h | 4 ++ include/linux/btf_ids.h | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 include/linux/btf_ids.h (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef218d7..0be2ee265931 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -641,6 +641,10 @@ __start_BTF = .; \ *(.BTF) \ __stop_BTF = .; \ + } \ + . = ALIGN(4); \ + .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ + *(.BTF_ids) \ } #else #define BTF diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h new file mode 100644 index 000000000000..fe019774f8a7 --- /dev/null +++ b/include/linux/btf_ids.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_BTF_IDS_H +#define _LINUX_BTF_IDS_H + +#include /* for __PASTE */ + +/* + * Following macros help to define lists of BTF IDs placed + * in .BTF_ids section. They are initially filled with zeros + * (during compilation) and resolved later during the + * linking phase by resolve_btfids tool. + * + * Any change in list layout must be reflected in resolve_btfids + * tool logic. + */ + +#define BTF_IDS_SECTION ".BTF_ids" + +#define ____BTF_ID(symbol) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".local " #symbol " ; \n" \ +".type " #symbol ", @object; \n" \ +".size " #symbol ", 4; \n" \ +#symbol ": \n" \ +".zero 4 \n" \ +".popsection; \n"); + +#define __BTF_ID(symbol) \ + ____BTF_ID(symbol) + +#define __ID(prefix) \ + __PASTE(prefix, __COUNTER__) + +/* + * The BTF_ID defines unique symbol for each ID pointing + * to 4 zero bytes. + */ +#define BTF_ID(prefix, name) \ + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__)) + +/* + * The BTF_ID_LIST macro defines pure (unsorted) list + * of BTF IDs, with following layout: + * + * BTF_ID_LIST(list1) + * BTF_ID(type1, name1) + * BTF_ID(type2, name2) + * + * list1: + * __BTF_ID__type1__name1__1: + * .zero 4 + * __BTF_ID__type2__name2__2: + * .zero 4 + * + */ +#define __BTF_ID_LIST(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".local " #name "; \n" \ +#name ":; \n" \ +".popsection; \n"); \ + +#define BTF_ID_LIST(name) \ +__BTF_ID_LIST(name) \ +extern u32 name[]; + +/* + * The BTF_ID_UNUSED macro defines 4 zero bytes. + * It's used when we want to define 'unused' entry + * in BTF_ID_LIST, like: + * + * BTF_ID_LIST(bpf_skb_output_btf_ids) + * BTF_ID(struct, sk_buff) + * BTF_ID_UNUSED + * BTF_ID(struct, task_struct) + */ + +#define BTF_ID_UNUSED \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".zero 4 \n" \ +".popsection; \n"); + + +#endif -- cgit v1.2.3 From 8aa5a33578e9685d06020bd10d1637557423e945 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:33 +0000 Subject: xsk: Add new statistics It can be useful for the user to know the reason behind a dropped packet. Introduce new counters which track drops on the receive path caused by: 1. rx ring being full 2. fill ring being empty Also, on the tx path introduce a counter which tracks the number of times we attempt pull from the tx ring when it is empty. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-2-ciara.loftus@intel.com --- include/net/xdp_sock.h | 4 ++++ include/uapi/linux/if_xdp.h | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 96bfc5f5f24e..c9d87cc40c11 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -69,7 +69,11 @@ struct xdp_sock { spinlock_t tx_completion_lock; /* Protects generic receive. */ spinlock_t rx_lock; + + /* Statistics */ u64 rx_dropped; + u64 rx_queue_full; + struct list_head map_list; /* Protects map_list */ spinlock_t map_list_lock; diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index be328c59389d..a78a8096f4ce 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -73,9 +73,12 @@ struct xdp_umem_reg { }; struct xdp_statistics { - __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_dropped; /* Dropped for other reasons */ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 rx_ring_full; /* Dropped due to rx ring being full */ + __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */ + __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */ }; struct xdp_options { -- cgit v1.2.3 From 0d80cb4612aa32dc0faa17fa3ab6f96f33e2b4a7 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:35 +0000 Subject: xsk: Add xdp statistics to xsk_diag Add xdp statistics to the information dumped through the xsk_diag interface Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-4-ciara.loftus@intel.com --- include/uapi/linux/xdp_diag.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h index 78b2591a7782..66b9973b4f4c 100644 --- a/include/uapi/linux/xdp_diag.h +++ b/include/uapi/linux/xdp_diag.h @@ -30,6 +30,7 @@ struct xdp_diag_msg { #define XDP_SHOW_RING_CFG (1 << 1) #define XDP_SHOW_UMEM (1 << 2) #define XDP_SHOW_MEMINFO (1 << 3) +#define XDP_SHOW_STATS (1 << 4) enum { XDP_DIAG_NONE, @@ -41,6 +42,7 @@ enum { XDP_DIAG_UMEM_FILL_RING, XDP_DIAG_UMEM_COMPLETION_RING, XDP_DIAG_MEMINFO, + XDP_DIAG_STATS, __XDP_DIAG_MAX, }; @@ -69,4 +71,13 @@ struct xdp_diag_umem { __u32 refs; }; +struct xdp_diag_stats { + __u64 n_rx_dropped; + __u64 n_rx_invalid; + __u64 n_rx_full; + __u64 n_fill_ring_empty; + __u64 n_tx_invalid; + __u64 n_tx_ring_empty; +}; + #endif /* _LINUX_XDP_DIAG_H */ -- cgit v1.2.3 From ed757328c34015be4ec51861a90bb3bcc807ad58 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 12:24:18 +0200 Subject: atm: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- include/uapi/linux/atmioc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/atmioc.h b/include/uapi/linux/atmioc.h index cd7655e40c77..a9030bcc8d56 100644 --- a/include/uapi/linux/atmioc.h +++ b/include/uapi/linux/atmioc.h @@ -5,7 +5,7 @@ /* - * See http://icawww1.epfl.ch/linux-atm/magic.html for the complete list of + * See https://icawww1.epfl.ch/linux-atm/magic.html for the complete list of * "magic" ioctl numbers. */ -- cgit v1.2.3 From c40f4e50b6cfc7c66f69d12c6b3fbcd954f1ded5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 11 Jul 2020 00:55:03 +0300 Subject: net: sched: Pass qdisc reference in struct flow_block_offload Previously, shared blocks were only relevant for the pseudo-qdiscs ingress and clsact. Recently, a qevent facility was introduced, which allows to bind blocks to well-defined slots of a qdisc instance. RED in particular got two qevents: early_drop and mark. Drivers that wish to offload these blocks will be sent the usual notification, and need to know which qdisc it is related to. To that end, extend flow_block_offload with a "sch" pointer, and initialize as appropriate. This prompts changes in the indirect block facility, which now tracks the scheduler in addition to the netdevice. Update signatures of several functions similarly. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/flow_offload.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index de395498440d..9f88a7b730a8 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -444,6 +444,7 @@ struct flow_block_offload { struct list_head cb_list; struct list_head *driver_block_list; struct netlink_ext_ack *extack; + struct Qdisc *sch; }; enum tc_setup_type; @@ -455,6 +456,7 @@ struct flow_block_cb; struct flow_block_indr { struct list_head list; struct net_device *dev; + struct Qdisc *sch; enum flow_block_binder_type binder_type; void *data; void *cb_priv; @@ -479,7 +481,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv), struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, + struct Qdisc *sch, void *data, void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)); void flow_block_cb_free(struct flow_block_cb *block_cb); @@ -553,7 +556,7 @@ static inline void flow_block_init(struct flow_block *flow_block) INIT_LIST_HEAD(&flow_block->cb_list); } -typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, +typedef int flow_indr_block_bind_cb_t(struct net_device *dev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); @@ -561,7 +564,7 @@ typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv); void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, void (*release)(void *cb_priv)); -int flow_indr_dev_setup_offload(struct net_device *dev, +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); -- cgit v1.2.3 From 91c724cfc0cbc049f18c04634ad56080650e93b8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 13 Jul 2020 19:57:01 +0300 Subject: net: mscc: ocelot: convert port registers to regmap At the moment, there are some minimal register differences between VSC7514 Ocelot and VSC9959 Felix. To be precise, the PCS1G registers are missing from Felix because it was integrated with an NXP PCS. But with VSC9953 Seville (not yet introduced), the register differences are more pronounced. The MAC registers are located at different offsets within the DEV_GMII target. So we need to refactor the driver to keep a regmap even for per-port registers. The callers of the ocelot_port_readl and ocelot_port_writel were kept unchanged, only the implementation is now more generic. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 42 ++++++++++++++++++++++- include/soc/mscc/ocelot_dev.h | 78 ------------------------------------------- 2 files changed, 41 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index e050f8121ba2..c2a2d0165ef1 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -126,6 +126,7 @@ enum ocelot_target { HSIO, PTP, GCB, + DEV_GMII, TARGET_MAX, }; @@ -408,6 +409,45 @@ enum ocelot_reg { PTP_CLK_CFG_ADJ_CFG, PTP_CLK_CFG_ADJ_FREQ, GCB_SOFT_RST = GCB << TARGET_OFFSET, + DEV_CLOCK_CFG = DEV_GMII << TARGET_OFFSET, + DEV_PORT_MISC, + DEV_EVENTS, + DEV_EEE_CFG, + DEV_RX_PATH_DELAY, + DEV_TX_PATH_DELAY, + DEV_PTP_PREDICT_CFG, + DEV_MAC_ENA_CFG, + DEV_MAC_MODE_CFG, + DEV_MAC_MAXLEN_CFG, + DEV_MAC_TAGS_CFG, + DEV_MAC_ADV_CHK_CFG, + DEV_MAC_IFG_CFG, + DEV_MAC_HDX_CFG, + DEV_MAC_DBG_CFG, + DEV_MAC_FC_MAC_LOW_CFG, + DEV_MAC_FC_MAC_HIGH_CFG, + DEV_MAC_STICKY, + PCS1G_CFG, + PCS1G_MODE_CFG, + PCS1G_SD_CFG, + PCS1G_ANEG_CFG, + PCS1G_ANEG_NP_CFG, + PCS1G_LB_CFG, + PCS1G_DBG_CFG, + PCS1G_CDET_CFG, + PCS1G_ANEG_STATUS, + PCS1G_ANEG_NP_STATUS, + PCS1G_LINK_STATUS, + PCS1G_LINK_DOWN_CNT, + PCS1G_STICKY, + PCS1G_DEBUG_STATUS, + PCS1G_LPI_CFG, + PCS1G_LPI_WAKE_ERROR_CNT, + PCS1G_LPI_STATUS, + PCS1G_TSTPAT_MODE_CFG, + PCS1G_TSTPAT_STATUS, + DEV_PCS_FX100_CFG, + DEV_PCS_FX100_STATUS, }; enum ocelot_regfield { @@ -494,7 +534,7 @@ struct ocelot_vcap_block { struct ocelot_port { struct ocelot *ocelot; - void __iomem *regs; + struct regmap *target; bool vlan_aware; diff --git a/include/soc/mscc/ocelot_dev.h b/include/soc/mscc/ocelot_dev.h index 7c08437061fc..0c6021f02fee 100644 --- a/include/soc/mscc/ocelot_dev.h +++ b/include/soc/mscc/ocelot_dev.h @@ -8,8 +8,6 @@ #ifndef _MSCC_OCELOT_DEV_H_ #define _MSCC_OCELOT_DEV_H_ -#define DEV_CLOCK_CFG 0x0 - #define DEV_CLOCK_CFG_MAC_TX_RST BIT(7) #define DEV_CLOCK_CFG_MAC_RX_RST BIT(6) #define DEV_CLOCK_CFG_PCS_TX_RST BIT(5) @@ -19,18 +17,12 @@ #define DEV_CLOCK_CFG_LINK_SPEED(x) ((x) & GENMASK(1, 0)) #define DEV_CLOCK_CFG_LINK_SPEED_M GENMASK(1, 0) -#define DEV_PORT_MISC 0x4 - #define DEV_PORT_MISC_FWD_ERROR_ENA BIT(4) #define DEV_PORT_MISC_FWD_PAUSE_ENA BIT(3) #define DEV_PORT_MISC_FWD_CTRL_ENA BIT(2) #define DEV_PORT_MISC_DEV_LOOP_ENA BIT(1) #define DEV_PORT_MISC_HDX_FAST_DIS BIT(0) -#define DEV_EVENTS 0x8 - -#define DEV_EEE_CFG 0xc - #define DEV_EEE_CFG_EEE_ENA BIT(22) #define DEV_EEE_CFG_EEE_TIMER_AGE(x) (((x) << 15) & GENMASK(21, 15)) #define DEV_EEE_CFG_EEE_TIMER_AGE_M GENMASK(21, 15) @@ -43,33 +35,19 @@ #define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_X(x) (((x) & GENMASK(7, 1)) >> 1) #define DEV_EEE_CFG_PORT_LPI BIT(0) -#define DEV_RX_PATH_DELAY 0x10 - -#define DEV_TX_PATH_DELAY 0x14 - -#define DEV_PTP_PREDICT_CFG 0x18 - #define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG(x) (((x) << 4) & GENMASK(11, 4)) #define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_M GENMASK(11, 4) #define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_X(x) (((x) & GENMASK(11, 4)) >> 4) #define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG(x) ((x) & GENMASK(3, 0)) #define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG_M GENMASK(3, 0) -#define DEV_MAC_ENA_CFG 0x1c - #define DEV_MAC_ENA_CFG_RX_ENA BIT(4) #define DEV_MAC_ENA_CFG_TX_ENA BIT(0) -#define DEV_MAC_MODE_CFG 0x20 - #define DEV_MAC_MODE_CFG_FC_WORD_SYNC_ENA BIT(8) #define DEV_MAC_MODE_CFG_GIGA_MODE_ENA BIT(4) #define DEV_MAC_MODE_CFG_FDX_ENA BIT(0) -#define DEV_MAC_MAXLEN_CFG 0x24 - -#define DEV_MAC_TAGS_CFG 0x28 - #define DEV_MAC_TAGS_CFG_TAG_ID(x) (((x) << 16) & GENMASK(31, 16)) #define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16) #define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16) @@ -77,12 +55,8 @@ #define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1) #define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) -#define DEV_MAC_ADV_CHK_CFG 0x2c - #define DEV_MAC_ADV_CHK_CFG_LEN_DROP_ENA BIT(0) -#define DEV_MAC_IFG_CFG 0x30 - #define DEV_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK BIT(17) #define DEV_MAC_IFG_CFG_REDUCED_TX_IFG BIT(16) #define DEV_MAC_IFG_CFG_TX_IFG(x) (((x) << 8) & GENMASK(12, 8)) @@ -94,8 +68,6 @@ #define DEV_MAC_IFG_CFG_RX_IFG1(x) ((x) & GENMASK(3, 0)) #define DEV_MAC_IFG_CFG_RX_IFG1_M GENMASK(3, 0) -#define DEV_MAC_HDX_CFG 0x34 - #define DEV_MAC_HDX_CFG_BYPASS_COL_SYNC BIT(26) #define DEV_MAC_HDX_CFG_OB_ENA BIT(25) #define DEV_MAC_HDX_CFG_WEXC_DIS BIT(24) @@ -107,17 +79,9 @@ #define DEV_MAC_HDX_CFG_LATE_COL_POS(x) ((x) & GENMASK(6, 0)) #define DEV_MAC_HDX_CFG_LATE_COL_POS_M GENMASK(6, 0) -#define DEV_MAC_DBG_CFG 0x38 - #define DEV_MAC_DBG_CFG_TBI_MODE BIT(4) #define DEV_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA BIT(0) -#define DEV_MAC_FC_MAC_LOW_CFG 0x3c - -#define DEV_MAC_FC_MAC_HIGH_CFG 0x40 - -#define DEV_MAC_STICKY 0x44 - #define DEV_MAC_STICKY_RX_IPG_SHRINK_STICKY BIT(9) #define DEV_MAC_STICKY_RX_PREAM_SHRINK_STICKY BIT(8) #define DEV_MAC_STICKY_RX_CARRIER_EXT_STICKY BIT(7) @@ -129,25 +93,17 @@ #define DEV_MAC_STICKY_TX_FRM_LEN_OVR_STICKY BIT(1) #define DEV_MAC_STICKY_TX_ABORT_STICKY BIT(0) -#define PCS1G_CFG 0x48 - #define PCS1G_CFG_LINK_STATUS_TYPE BIT(4) #define PCS1G_CFG_AN_LINK_CTRL_ENA BIT(1) #define PCS1G_CFG_PCS_ENA BIT(0) -#define PCS1G_MODE_CFG 0x4c - #define PCS1G_MODE_CFG_UNIDIR_MODE_ENA BIT(4) #define PCS1G_MODE_CFG_SGMII_MODE_ENA BIT(0) -#define PCS1G_SD_CFG 0x50 - #define PCS1G_SD_CFG_SD_SEL BIT(8) #define PCS1G_SD_CFG_SD_POL BIT(4) #define PCS1G_SD_CFG_SD_ENA BIT(0) -#define PCS1G_ANEG_CFG 0x54 - #define PCS1G_ANEG_CFG_ADV_ABILITY(x) (((x) << 16) & GENMASK(31, 16)) #define PCS1G_ANEG_CFG_ADV_ABILITY_M GENMASK(31, 16) #define PCS1G_ANEG_CFG_ADV_ABILITY_X(x) (((x) & GENMASK(31, 16)) >> 16) @@ -155,29 +111,19 @@ #define PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT BIT(1) #define PCS1G_ANEG_CFG_ANEG_ENA BIT(0) -#define PCS1G_ANEG_NP_CFG 0x58 - #define PCS1G_ANEG_NP_CFG_NP_TX(x) (((x) << 16) & GENMASK(31, 16)) #define PCS1G_ANEG_NP_CFG_NP_TX_M GENMASK(31, 16) #define PCS1G_ANEG_NP_CFG_NP_TX_X(x) (((x) & GENMASK(31, 16)) >> 16) #define PCS1G_ANEG_NP_CFG_NP_LOADED_ONE_SHOT BIT(0) -#define PCS1G_LB_CFG 0x5c - #define PCS1G_LB_CFG_RA_ENA BIT(4) #define PCS1G_LB_CFG_GMII_PHY_LB_ENA BIT(1) #define PCS1G_LB_CFG_TBI_HOST_LB_ENA BIT(0) -#define PCS1G_DBG_CFG 0x60 - #define PCS1G_DBG_CFG_UDLT BIT(0) -#define PCS1G_CDET_CFG 0x64 - #define PCS1G_CDET_CFG_CDET_ENA BIT(0) -#define PCS1G_ANEG_STATUS 0x68 - #define PCS1G_ANEG_STATUS_LP_ADV_ABILITY(x) (((x) << 16) & GENMASK(31, 16)) #define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_M GENMASK(31, 16) #define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_X(x) (((x) & GENMASK(31, 16)) >> 16) @@ -185,10 +131,6 @@ #define PCS1G_ANEG_STATUS_PAGE_RX_STICKY BIT(3) #define PCS1G_ANEG_STATUS_ANEG_COMPLETE BIT(0) -#define PCS1G_ANEG_NP_STATUS 0x6c - -#define PCS1G_LINK_STATUS 0x70 - #define PCS1G_LINK_STATUS_DELAY_VAR(x) (((x) << 12) & GENMASK(15, 12)) #define PCS1G_LINK_STATUS_DELAY_VAR_M GENMASK(15, 12) #define PCS1G_LINK_STATUS_DELAY_VAR_X(x) (((x) & GENMASK(15, 12)) >> 12) @@ -196,17 +138,9 @@ #define PCS1G_LINK_STATUS_LINK_STATUS BIT(4) #define PCS1G_LINK_STATUS_SYNC_STATUS BIT(0) -#define PCS1G_LINK_DOWN_CNT 0x74 - -#define PCS1G_STICKY 0x78 - #define PCS1G_STICKY_LINK_DOWN_STICKY BIT(4) #define PCS1G_STICKY_OUT_OF_SYNC_STICKY BIT(0) -#define PCS1G_DEBUG_STATUS 0x7c - -#define PCS1G_LPI_CFG 0x80 - #define PCS1G_LPI_CFG_QSGMII_MS_SEL BIT(20) #define PCS1G_LPI_CFG_RX_LPI_OUT_DIS BIT(17) #define PCS1G_LPI_CFG_LPI_TESTMODE BIT(16) @@ -215,10 +149,6 @@ #define PCS1G_LPI_CFG_LPI_RX_WTIM_X(x) (((x) & GENMASK(5, 4)) >> 4) #define PCS1G_LPI_CFG_TX_ASSERT_LPIDLE BIT(0) -#define PCS1G_LPI_WAKE_ERROR_CNT 0x84 - -#define PCS1G_LPI_STATUS 0x88 - #define PCS1G_LPI_STATUS_RX_LPI_FAIL BIT(16) #define PCS1G_LPI_STATUS_RX_LPI_EVENT_STICKY BIT(12) #define PCS1G_LPI_STATUS_RX_QUIET BIT(9) @@ -227,18 +157,12 @@ #define PCS1G_LPI_STATUS_TX_QUIET BIT(1) #define PCS1G_LPI_STATUS_TX_LPI_MODE BIT(0) -#define PCS1G_TSTPAT_MODE_CFG 0x8c - -#define PCS1G_TSTPAT_STATUS 0x90 - #define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT(x) (((x) << 8) & GENMASK(15, 8)) #define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_M GENMASK(15, 8) #define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_X(x) (((x) & GENMASK(15, 8)) >> 8) #define PCS1G_TSTPAT_STATUS_JTP_ERR BIT(4) #define PCS1G_TSTPAT_STATUS_JTP_LOCK BIT(0) -#define DEV_PCS_FX100_CFG 0x94 - #define DEV_PCS_FX100_CFG_SD_SEL BIT(26) #define DEV_PCS_FX100_CFG_SD_POL BIT(25) #define DEV_PCS_FX100_CFG_SD_ENA BIT(24) @@ -259,8 +183,6 @@ #define DEV_PCS_FX100_CFG_FEFGEN_ENA BIT(1) #define DEV_PCS_FX100_CFG_PCS_ENA BIT(0) -#define DEV_PCS_FX100_STATUS 0x98 - #define DEV_PCS_FX100_STATUS_EDGE_POS_PTP(x) (((x) << 8) & GENMASK(11, 8)) #define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_M GENMASK(11, 8) #define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_X(x) (((x) & GENMASK(11, 8)) >> 8) -- cgit v1.2.3 From 2789658fa319f51db43a585e076bb99a3de3c6d1 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Mon, 13 Jul 2020 19:57:02 +0300 Subject: soc: mscc: ocelot: add MII registers description Add the register definitions for the MSCC MIIM MDIO controller in preparation for seville_vsc9959.c to create its accessors for the internal MDIO bus. Since we've introduced elements to ocelot_regfields that are not instantiated by felix and ocelot, we need to define the size of the regfields arrays explicitly, otherwise ocelot_regfields_init, which iterates up to REGFIELD_MAX, will fault on the undefined regfield entries (if we're lucky). Signed-off-by: Maxim Kochetkov Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index c2a2d0165ef1..348fa26a349c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -409,6 +409,9 @@ enum ocelot_reg { PTP_CLK_CFG_ADJ_CFG, PTP_CLK_CFG_ADJ_FREQ, GCB_SOFT_RST = GCB << TARGET_OFFSET, + GCB_MIIM_MII_STATUS, + GCB_MIIM_MII_CMD, + GCB_MIIM_MII_DATA, DEV_CLOCK_CFG = DEV_GMII << TARGET_OFFSET, DEV_PORT_MISC, DEV_EVENTS, @@ -496,6 +499,8 @@ enum ocelot_regfield { SYS_RESET_CFG_MEM_ENA, SYS_RESET_CFG_MEM_INIT, GCB_SOFT_RST_SWC_RST, + GCB_MIIM_MII_STATUS_PENDING, + GCB_MIIM_MII_STATUS_BUSY, REGFIELD_MAX }; -- cgit v1.2.3 From 886e1387c73d895ad0eff53353913081983570c0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 13 Jul 2020 19:57:03 +0300 Subject: net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields Currently Felix and Ocelot share the same bit layout in these per-port registers, but Seville does not. So we need reg_fields for that. Actually since these are per-port registers, we need to also specify the number of ports, and register size per port, and use the regmap API for multiple ports. There's a more subtle point to be made about the other 2 register fields: - QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG - QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE which we are not writing any longer, for 2 reasons: - Using the previous API (ocelot_write_rix), we were only writing 1 for Felix and Ocelot, which was their hardware-default value, and which there wasn't any intention in changing. - In the case of SCH_NEXT_CFG, in fact Seville does not have this register field at all, and therefore, if we want to have common code we would be required to not write to it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 15 +++++++++++++++ include/soc/mscc/ocelot_qsys.h | 13 ------------- include/soc/mscc/ocelot_sys.h | 13 ------------- 3 files changed, 15 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 348fa26a349c..19d97585345a 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -490,11 +490,21 @@ enum ocelot_regfield { ANA_TABLES_MACACCESS_B_DOM, ANA_TABLES_MACTINDX_BUCKET, ANA_TABLES_MACTINDX_M_INDEX, + QSYS_SWITCH_PORT_MODE_PORT_ENA, + QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG, + QSYS_SWITCH_PORT_MODE_YEL_RSRVD, + QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE, + QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, + QSYS_SWITCH_PORT_MODE_TX_PFC_MODE, QSYS_TIMED_FRAME_ENTRY_TFRM_VLD, QSYS_TIMED_FRAME_ENTRY_TFRM_FP, QSYS_TIMED_FRAME_ENTRY_TFRM_PORTNO, QSYS_TIMED_FRAME_ENTRY_TFRM_TM_SEL, QSYS_TIMED_FRAME_ENTRY_TFRM_TM_T, + SYS_PORT_MODE_DATA_WO_TS, + SYS_PORT_MODE_INCL_INJ_HDR, + SYS_PORT_MODE_INCL_XTR_HDR, + SYS_PORT_MODE_INCL_HDR_ERR, SYS_RESET_CFG_CORE_ENA, SYS_RESET_CFG_MEM_ENA, SYS_RESET_CFG_MEM_INIT, @@ -638,6 +648,11 @@ struct ocelot_policer { #define ocelot_rmw_rix(ocelot, val, m, reg, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_RSZ * (ri)) #define ocelot_rmw(ocelot, val, m, reg) __ocelot_rmw_ix(ocelot, val, m, reg, 0) +#define ocelot_field_write(ocelot, reg, val) regmap_field_write((ocelot)->regfields[(reg)], (val)) +#define ocelot_field_read(ocelot, reg, val) regmap_field_read((ocelot)->regfields[(reg)], (val)) +#define ocelot_fields_write(ocelot, id, reg, val) regmap_fields_write((ocelot)->regfields[(reg)], (id), (val)) +#define ocelot_fields_read(ocelot, id, reg, val) regmap_fields_read((ocelot)->regfields[(reg)], (id), (val)) + /* I/O */ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); diff --git a/include/soc/mscc/ocelot_qsys.h b/include/soc/mscc/ocelot_qsys.h index d8c63aa761be..a814bc2017d8 100644 --- a/include/soc/mscc/ocelot_qsys.h +++ b/include/soc/mscc/ocelot_qsys.h @@ -13,19 +13,6 @@ #define QSYS_PORT_MODE_DEQUEUE_DIS BIT(1) #define QSYS_PORT_MODE_DEQUEUE_LATE BIT(0) -#define QSYS_SWITCH_PORT_MODE_RSZ 0x4 - -#define QSYS_SWITCH_PORT_MODE_PORT_ENA BIT(14) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(x) (((x) << 11) & GENMASK(13, 11)) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_M GENMASK(13, 11) -#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_X(x) (((x) & GENMASK(13, 11)) >> 11) -#define QSYS_SWITCH_PORT_MODE_YEL_RSRVD BIT(10) -#define QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE BIT(9) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA(x) (((x) << 1) & GENMASK(8, 1)) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_M GENMASK(8, 1) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_X(x) (((x) & GENMASK(8, 1)) >> 1) -#define QSYS_SWITCH_PORT_MODE_TX_PFC_MODE BIT(0) - #define QSYS_STAT_CNT_CFG_TX_GREEN_CNT_MODE BIT(5) #define QSYS_STAT_CNT_CFG_TX_YELLOW_CNT_MODE BIT(4) #define QSYS_STAT_CNT_CFG_DROP_GREEN_CNT_MODE BIT(3) diff --git a/include/soc/mscc/ocelot_sys.h b/include/soc/mscc/ocelot_sys.h index 16f91e172bcb..8a95fc93fde5 100644 --- a/include/soc/mscc/ocelot_sys.h +++ b/include/soc/mscc/ocelot_sys.h @@ -12,19 +12,6 @@ #define SYS_COUNT_TX_OCTETS_RSZ 0x4 -#define SYS_PORT_MODE_RSZ 0x4 - -#define SYS_PORT_MODE_DATA_WO_TS(x) (((x) << 5) & GENMASK(6, 5)) -#define SYS_PORT_MODE_DATA_WO_TS_M GENMASK(6, 5) -#define SYS_PORT_MODE_DATA_WO_TS_X(x) (((x) & GENMASK(6, 5)) >> 5) -#define SYS_PORT_MODE_INCL_INJ_HDR(x) (((x) << 3) & GENMASK(4, 3)) -#define SYS_PORT_MODE_INCL_INJ_HDR_M GENMASK(4, 3) -#define SYS_PORT_MODE_INCL_INJ_HDR_X(x) (((x) & GENMASK(4, 3)) >> 3) -#define SYS_PORT_MODE_INCL_XTR_HDR(x) (((x) << 1) & GENMASK(2, 1)) -#define SYS_PORT_MODE_INCL_XTR_HDR_M GENMASK(2, 1) -#define SYS_PORT_MODE_INCL_XTR_HDR_X(x) (((x) & GENMASK(2, 1)) >> 1) -#define SYS_PORT_MODE_INJ_HDR_ERR BIT(0) - #define SYS_FRONT_PORT_MODE_RSZ 0x4 #define SYS_FRONT_PORT_MODE_HDX_MODE BIT(0) -- cgit v1.2.3 From 67c2404922c2c3f9cc0898aafaa4e3bea2bde084 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 13 Jul 2020 19:57:04 +0300 Subject: net: dsa: felix: create a template for the DSA tags on xmit With this patch we try to kill 2 birds with 1 stone. First of all, some switches that use tag_ocelot.c don't have the exact same bitfield layout for the DSA tags. The destination ports field is different for Seville VSC9953 for example. So the choices are to either duplicate tag_ocelot.c into a new tag_seville.c (sub-optimal) or somehow take into account a supposed ocelot->dest_ports_offset when packing this field into the DSA injection header (again not ideal). Secondly, tag_ocelot.c already needs to memset a 128-bit area to zero and call some packing() functions of dubious performance in the fastpath. And most of the values it needs to pack are pretty much constant (BYPASS=1, SRC_PORT=CPU, DEST=port index). So it would be good if we could improve that. The proposed solution is to allocate a memory area per port at probe time, initialize that with the statically defined bits as per chip hardware revision, and just perform a simpler memcpy in the fastpath. Other alternatives have been analyzed, such as: - Create a separate tag_seville.c: too much code duplication for just 1 bit field difference. - Create a separate DSA_TAG_PROTO_SEVILLE under tag_ocelot.c, just like tag_brcm.c, which would have a separate .xmit function. Again, too much code duplication for just 1 bit field difference. - Allocate the template from the init function of the tag_ocelot.c module, instead of from the driver: couldn't figure out a method of accessing the correct port template corresponding to the correct tagger in the .xmit function. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 19d97585345a..6cfbace57770 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -564,6 +564,8 @@ struct ocelot_port { u8 ts_id; phy_interface_t phy_mode; + + u8 *xmit_template; }; struct ocelot { -- cgit v1.2.3 From 541132f0961a4f17b02974902085af964adf5966 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Mon, 13 Jul 2020 19:57:07 +0300 Subject: net: mscc: ocelot: convert SYS_PAUSE_CFG register access to regfield Seville has a different bitwise layout than Ocelot and Felix. Signed-off-by: Maxim Kochetkov Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 3 +++ include/soc/mscc/ocelot_sys.h | 10 ---------- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6cfbace57770..71bb92bcfdf7 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -511,6 +511,9 @@ enum ocelot_regfield { GCB_SOFT_RST_SWC_RST, GCB_MIIM_MII_STATUS_PENDING, GCB_MIIM_MII_STATUS_BUSY, + SYS_PAUSE_CFG_PAUSE_START, + SYS_PAUSE_CFG_PAUSE_STOP, + SYS_PAUSE_CFG_PAUSE_ENA, REGFIELD_MAX }; diff --git a/include/soc/mscc/ocelot_sys.h b/include/soc/mscc/ocelot_sys.h index 8a95fc93fde5..79cf40ccdbe6 100644 --- a/include/soc/mscc/ocelot_sys.h +++ b/include/soc/mscc/ocelot_sys.h @@ -43,16 +43,6 @@ #define SYS_TIMESTAMP_OFFSET_TIMESTAMP_OFFSET(x) ((x) & GENMASK(5, 0)) #define SYS_TIMESTAMP_OFFSET_TIMESTAMP_OFFSET_M GENMASK(5, 0) -#define SYS_PAUSE_CFG_RSZ 0x4 - -#define SYS_PAUSE_CFG_PAUSE_START(x) (((x) << 10) & GENMASK(18, 10)) -#define SYS_PAUSE_CFG_PAUSE_START_M GENMASK(18, 10) -#define SYS_PAUSE_CFG_PAUSE_START_X(x) (((x) & GENMASK(18, 10)) >> 10) -#define SYS_PAUSE_CFG_PAUSE_STOP(x) (((x) << 1) & GENMASK(9, 1)) -#define SYS_PAUSE_CFG_PAUSE_STOP_M GENMASK(9, 1) -#define SYS_PAUSE_CFG_PAUSE_STOP_X(x) (((x) & GENMASK(9, 1)) >> 1) -#define SYS_PAUSE_CFG_PAUSE_ENA BIT(0) - #define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START(x) (((x) << 9) & GENMASK(17, 9)) #define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START_M GENMASK(17, 9) #define SYS_PAUSE_TOT_CFG_PAUSE_TOT_START_X(x) (((x) & GENMASK(17, 9)) >> 9) -- cgit v1.2.3 From aa92d836d5c40a7e21e563a272ad177f1bfd44dd Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Mon, 13 Jul 2020 19:57:08 +0300 Subject: net: mscc: ocelot: extend watermark encoding function The ocelot_wm_encode function deals with setting thresholds for pause frame start and stop. In Ocelot and Felix the register layout is the same, but for Seville, it isn't. The easiest way to accommodate Seville hardware configuration is to introduce a function pointer for setting this up. Signed-off-by: Maxim Kochetkov Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 71bb92bcfdf7..da369b12005f 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -541,6 +541,7 @@ struct ocelot; struct ocelot_ops { int (*reset)(struct ocelot *ocelot); + u16 (*wm_enc)(u16 value); }; struct ocelot_vcap_block { -- cgit v1.2.3 From 079ef53673f2e3b3ee1728800311f20f28eed4f7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Jul 2020 12:25:33 +0200 Subject: bpf: Fix build for disabled CONFIG_DEBUG_INFO_BTF option Stephen reported following linker warnings on powerpc build: ld: warning: orphan section `.BTF_ids' from `kernel/trace/bpf_trace.o' being placed in section `.BTF_ids' ld: warning: orphan section `.BTF_ids' from `kernel/bpf/btf.o' being placed in section `.BTF_ids' ld: warning: orphan section `.BTF_ids' from `kernel/bpf/stackmap.o' being placed in section `.BTF_ids' ld: warning: orphan section `.BTF_ids' from `net/core/filter.o' being placed in section `.BTF_ids' ld: warning: orphan section `.BTF_ids' from `kernel/trace/bpf_trace.o' being placed in section `.BTF_ids' It's because we generated .BTF_ids section even when CONFIG_DEBUG_INFO_BTF is not enabled. Fixing this by generating empty btf_id arrays for this case. Reported-by: Stephen Rothwell Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/bpf/20200714102534.299280-1-jolsa@kernel.org --- include/linux/btf_ids.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index fe019774f8a7..b3c73db9587c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -3,6 +3,8 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#ifdef CONFIG_DEBUG_INFO_BTF + #include /* for __PASTE */ /* @@ -83,5 +85,12 @@ asm( \ ".zero 4 \n" \ ".popsection; \n"); +#else + +#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID(prefix, name) +#define BTF_ID_UNUSED + +#endif /* CONFIG_DEBUG_INFO_BTF */ #endif -- cgit v1.2.3 From 11bb2f7a45909f4f64afe471875672ae1b84a380 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Jul 2020 12:25:34 +0200 Subject: bpf: Fix cross build for CONFIG_DEBUG_INFO_BTF option Stephen and 0-DAY CI Kernel Test Service reported broken cross build for arm (arm-linux-gnueabi-gcc (GCC) 9.3.0), with following output: /tmp/ccMS5uth.s: Assembler messages: /tmp/ccMS5uth.s:69: Error: unrecognized symbol type "" /tmp/ccMS5uth.s:82: Error: unrecognized symbol type "" Having '@object' for .type diretive is wrong because '@' is comment character for some architectures. Using STT_OBJECT instead that should work everywhere. Also using HOST* variables to build resolve_btfids so it's properly build in crossbuilds (stolen from objtool's Makefile). Reported-by: kernel test robot Reported-by: Stephen Rothwell Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/bpf/20200714102534.299280-2-jolsa@kernel.org --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index b3c73db9587c..1cdb56950ffe 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -23,7 +23,7 @@ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ ".local " #symbol " ; \n" \ -".type " #symbol ", @object; \n" \ +".type " #symbol ", STT_OBJECT; \n" \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ ".zero 4 \n" \ -- cgit v1.2.3 From cf7c52748f64606f5f9111e7cbdb2ffb281a60af Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:47 +0200 Subject: switchdev: mrp: Extend switchdev API for MRP Interconnect Extend switchdev API to add support for MRP interconnect. The HW is notified in the following cases: SWITCHDEV_OBJ_ID_IN_ROLE_MRP: This is used when the interconnect role of the node changes. The supported roles are MIM and MIC. SWITCHDEV_OBJ_ID_IN_STATE_MRP: This is used when the interconnect ring changes it states to open or closed. SWITCHDEV_OBJ_ID_IN_TEST_MRP: This is used to start/stop sending MRP_InTest frames on all MRP ports. This is called only on nodes that have the interconnect role MIM. Signed-off-by: Horatiu Vultur Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index b8c059b4e06d..ff2246914301 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -76,6 +76,10 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_RING_TEST_MRP, SWITCHDEV_OBJ_ID_RING_ROLE_MRP, SWITCHDEV_OBJ_ID_RING_STATE_MRP, + SWITCHDEV_OBJ_ID_IN_TEST_MRP, + SWITCHDEV_OBJ_ID_IN_ROLE_MRP, + SWITCHDEV_OBJ_ID_IN_STATE_MRP, + #endif }; @@ -155,6 +159,40 @@ struct switchdev_obj_ring_state_mrp { #define SWITCHDEV_OBJ_RING_STATE_MRP(OBJ) \ container_of((OBJ), struct switchdev_obj_ring_state_mrp, obj) +/* SWITCHDEV_OBJ_ID_IN_TEST_MRP */ +struct switchdev_obj_in_test_mrp { + struct switchdev_obj obj; + /* The value is in us and a value of 0 represents to stop */ + u32 interval; + u32 in_id; + u32 period; + u8 max_miss; +}; + +#define SWITCHDEV_OBJ_IN_TEST_MRP(OBJ) \ + container_of((OBJ), struct switchdev_obj_in_test_mrp, obj) + +/* SWICHDEV_OBJ_ID_IN_ROLE_MRP */ +struct switchdev_obj_in_role_mrp { + struct switchdev_obj obj; + struct net_device *i_port; + u32 ring_id; + u16 in_id; + u8 in_role; +}; + +#define SWITCHDEV_OBJ_IN_ROLE_MRP(OBJ) \ + container_of((OBJ), struct switchdev_obj_in_role_mrp, obj) + +struct switchdev_obj_in_state_mrp { + struct switchdev_obj obj; + u32 in_id; + u8 in_state; +}; + +#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \ + container_of((OBJ), struct switchdev_obj_in_state_mrp, obj) + #endif typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); -- cgit v1.2.3 From 2801758391ba6b0c20e253b956355e1b15ad85a2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:48 +0200 Subject: bridge: uapi: mrp: Extend MRP attributes for MRP interconnect Extend the existing MRP netlink attributes to allow to configure MRP Interconnect: IFLA_BRIDGE_MRP_IN_ROLE - the parameter type is br_mrp_in_role which contains the interconnect id, the ring id, the interconnect role(MIM or MIC) and the port ifindex that represents the interconnect port. IFLA_BRIDGE_MRP_IN_STATE - the parameter type is br_mrp_in_state which contains the interconnect id and the interconnect state. IFLA_BRIDGE_MRP_IN_TEST - the parameter type is br_mrp_start_in_test which contains the interconnect id, the interval at which to send MRP_InTest frames, how many test frames can be missed before declaring the interconnect ring open and the period which represents for how long to send MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 53 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/mrp_bridge.h | 38 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c114c1c2bd53..d840a3e37a37 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -167,6 +167,9 @@ enum { IFLA_BRIDGE_MRP_RING_ROLE, IFLA_BRIDGE_MRP_START_TEST, IFLA_BRIDGE_MRP_INFO, + IFLA_BRIDGE_MRP_IN_ROLE, + IFLA_BRIDGE_MRP_IN_STATE, + IFLA_BRIDGE_MRP_START_IN_TEST, __IFLA_BRIDGE_MRP_MAX, }; @@ -245,6 +248,37 @@ enum { #define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) +enum { + IFLA_BRIDGE_MRP_IN_STATE_UNSPEC, + IFLA_BRIDGE_MRP_IN_STATE_IN_ID, + IFLA_BRIDGE_MRP_IN_STATE_STATE, + __IFLA_BRIDGE_MRP_IN_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_IN_ROLE_RING_ID, + IFLA_BRIDGE_MRP_IN_ROLE_IN_ID, + IFLA_BRIDGE_MRP_IN_ROLE_ROLE, + IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX, + __IFLA_BRIDGE_MRP_IN_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID, + IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD, + __IFLA_BRIDGE_MRP_START_IN_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; @@ -270,6 +304,25 @@ struct br_mrp_start_test { __u32 monitor; }; +struct br_mrp_in_state { + __u32 in_state; + __u16 in_id; +}; + +struct br_mrp_in_role { + __u32 ring_id; + __u32 in_role; + __u32 i_ifindex; + __u16 in_id; +}; + +struct br_mrp_start_in_test { + __u32 interval; + __u32 max_miss; + __u32 period; + __u16 in_id; +}; + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index bee366540212..6aeb13ef0b1e 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -21,11 +21,22 @@ enum br_mrp_ring_role_type { BR_MRP_RING_ROLE_MRA, }; +enum br_mrp_in_role_type { + BR_MRP_IN_ROLE_DISABLED, + BR_MRP_IN_ROLE_MIC, + BR_MRP_IN_ROLE_MIM, +}; + enum br_mrp_ring_state_type { BR_MRP_RING_STATE_OPEN, BR_MRP_RING_STATE_CLOSED, }; +enum br_mrp_in_state_type { + BR_MRP_IN_STATE_OPEN, + BR_MRP_IN_STATE_CLOSED, +}; + enum br_mrp_port_state_type { BR_MRP_PORT_STATE_DISABLED, BR_MRP_PORT_STATE_BLOCKED, @@ -36,6 +47,7 @@ enum br_mrp_port_state_type { enum br_mrp_port_role_type { BR_MRP_PORT_ROLE_PRIMARY, BR_MRP_PORT_ROLE_SECONDARY, + BR_MRP_PORT_ROLE_INTER, }; enum br_mrp_tlv_header_type { @@ -45,6 +57,10 @@ enum br_mrp_tlv_header_type { BR_MRP_TLV_HEADER_RING_TOPO = 0x3, BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4, BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5, + BR_MRP_TLV_HEADER_IN_TEST = 0x6, + BR_MRP_TLV_HEADER_IN_TOPO = 0x7, + BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8, + BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9, BR_MRP_TLV_HEADER_OPTION = 0x7f, }; @@ -118,4 +134,26 @@ struct br_mrp_oui_hdr { __u8 oui[MRP_OUI_LENGTH]; }; +struct br_mrp_in_test_hdr { + __be16 id; + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 state; + __be16 transitions; + __be32 timestamp; +}; + +struct br_mrp_in_topo_hdr { + __u8 sa[ETH_ALEN]; + __be16 id; + __be16 interval; +}; + +struct br_mrp_in_link_hdr { + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 id; + __be16 interval; +}; + #endif -- cgit v1.2.3 From 43364ef1a12a4236b7956b076649ddd080764cd1 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:49 +0200 Subject: bridge: mrp: Extend bridge interface This patch adds a new flag(BR_MRP_LOST_IN_CONT) to the net bridge ports. This bit will be set when the port lost the continuity of MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b3a8d3054af0..6479a38e52fa 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -49,6 +49,7 @@ struct br_ip_list { #define BR_ISOLATED BIT(16) #define BR_MRP_AWARE BIT(17) #define BR_MRP_LOST_CONT BIT(18) +#define BR_MRP_LOST_IN_CONT BIT(19) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From 559139cb0405d38816e5e725adee9000db993235 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:56 +0200 Subject: bridge: uapi: mrp: Extend MRP_INFO attributes for interconnect status Extend the existing MRP_INFO to return status of MRP interconnect. In case there is no MRP interconnect on the node then the role will be disabled so the other attributes can be ignored. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d840a3e37a37..c1227aecd38f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -243,6 +243,11 @@ enum { IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + IFLA_BRIDGE_MRP_INFO_IN_STATE, + IFLA_BRIDGE_MRP_INFO_IN_ROLE, + IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, __IFLA_BRIDGE_MRP_INFO_MAX, }; -- cgit v1.2.3 From ffb3adba64801f70c472303c9e386eb5eaec193d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:58 +0200 Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which allows to notify the userspace when the node lost the contiuity of MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc185a007ade..26842ffd0501 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -344,6 +344,7 @@ enum { IFLA_BRPORT_ISOLATED, IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, + IFLA_BRPORT_MRP_IN_OPEN, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From 17c7b8b1cadc0ae95ee2a9fa18022dd0c901af5f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 15 Jul 2020 10:18:46 +0800 Subject: cipso: Remove unused inline functions They are not used any more since commit b1edeb102397 ("netlabel: Replace protocol/NetLabel linking with refrerence counts") Signed-off-by: YueHaibing Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 428b6725b248..53dd7d988a2d 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -150,18 +150,6 @@ static inline int cipso_v4_doi_walk(u32 *skip_cnt, { return 0; } - -static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, - const char *domain) -{ - return -ENOSYS; -} - -static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - return 0; -} #endif /* CONFIG_NETLABEL */ /* -- cgit v1.2.3 From 054848d21bc19992f2c3540244bd6defbc833aa2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 15 Jul 2020 10:31:19 +0800 Subject: net: flow: Remove unused inline function It is not used since commit 09c7570480f7 ("xfrm: remove flow cache") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/net/flow.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index a50fb77a0b27..929d3ca614d0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -204,24 +204,6 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) return container_of(fldn, struct flowi, u.dn); } -typedef unsigned long flow_compare_t; - -static inline unsigned int flow_key_size(u16 family) -{ - switch (family) { - case AF_INET: - BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t)); - return sizeof(struct flowi4) / sizeof(flow_compare_t); - case AF_INET6: - BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t)); - return sizeof(struct flowi6) / sizeof(flow_compare_t); - case AF_DECnet: - BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t)); - return sizeof(struct flowidn) / sizeof(flow_compare_t); - } - return 0; -} - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); #endif -- cgit v1.2.3 From 0d40efabe3e383dd7e347debd85a0d4754402efb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 15 Jul 2020 10:36:13 +0800 Subject: mptcp: Remove unused inline function mptcp_rcv_synsent() commit 263e1201a2c3 ("mptcp: consolidate synack processing.") left behind this, remove it. Signed-off-by: YueHaibing Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/mptcp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 46d0487d2b22..02158c257bd4 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -164,10 +164,6 @@ static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, return false; } -static inline void mptcp_rcv_synsent(struct sock *sk) -{ -} - static inline bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, struct mptcp_out_options *opts) -- cgit v1.2.3 From 8635764bcf0f109933b593d79ac2247b1e863d0a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 15 Jul 2020 10:52:03 +0800 Subject: netpoll: Remove unused inline function netpoll_netdev_init() commit d565b0a1a9b6 ("net: Add Generic Receive Offload infrastructure") left behind this, remove it. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/netpoll.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f47af135bd56..e6a2d72e0dc7 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -102,9 +102,6 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) static inline void netpoll_poll_unlock(void *have) { } -static inline void netpoll_netdev_init(struct net_device *dev) -{ -} static inline bool netpoll_tx_running(struct net_device *dev) { return false; -- cgit v1.2.3 From 0d80b76184ac9f2dfb939e39ad1f961fc006b99d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:38 -0700 Subject: net: qed: drop duplicate words in comments Drop doubled word "the" in two comments. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/linux/qed/qed_chain.h | 2 +- include/linux/qed/qed_if.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 92cdc79e5019..7071dc92b4e2 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -130,7 +130,7 @@ struct qed_chain { } pbl_sp; /* Address of first page of the chain - the address is required - * for fastpath operation [consume/produce] but only for the the SINGLE + * for fastpath operation [consume/produce] but only for the SINGLE * flavour which isn't considered fastpath [== SPQ]. */ void *p_virt_addr; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 90e1060da02b..8a6e3ad436d1 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -498,7 +498,7 @@ struct qed_fcoe_pf_params { u8 bdq_pbl_num_entries[2]; }; -/* Most of the the parameters below are described in the FW iSCSI / TCP HSI */ +/* Most of the parameters below are described in the FW iSCSI / TCP HSI */ struct qed_iscsi_pf_params { u64 glbl_q_params_addr; u64 bdq_pbl_base_addr[3]; -- cgit v1.2.3 From 2ff17117e60572d6974c766d6b1a225ec7d68795 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:39 -0700 Subject: net: skbuff.h: drop duplicate words in comments Drop doubled words in several comments. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c0377fc00c2..6a82d4a8229e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1328,7 +1328,7 @@ void skb_flow_dissect_meta(const struct sk_buff *skb, void *target_container); /* Gets a skb connection tracking info, ctinfo map should be a - * a map of mapsize to translate enum ip_conntrack_info states + * map of mapsize to translate enum ip_conntrack_info states * to user states. */ void @@ -3812,7 +3812,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) * must call this function to return the skb back to the stack with a * timestamp. * - * @skb: clone of the the original outgoing packet + * @skb: clone of the original outgoing packet * @hwtstamps: hardware time stamps * */ -- cgit v1.2.3 From 158e89639166461e225a855bce6e3ee6cd8bb1c0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:40 -0700 Subject: net: wimax: fix duplicate words in comments Drop doubled words in two comments. Fix a spello/typo. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/linux/wimax/debug.h | 4 ++-- include/net/wimax.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h index 4dd2c1cea6a9..cdae052bcdcd 100644 --- a/include/linux/wimax/debug.h +++ b/include/linux/wimax/debug.h @@ -184,8 +184,8 @@ do { \ /* - * CPP sintatic sugar to generate A_B like symbol names when one of - * the arguments is a a preprocessor #define. + * CPP syntactic sugar to generate A_B like symbol names when one of + * the arguments is a preprocessor #define. */ #define __D_PASTE__(varname, modulename) varname##_##modulename #define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) diff --git a/include/net/wimax.h b/include/net/wimax.h index 24ba7e89c26c..f6e31d2f47aa 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -28,7 +28,7 @@ * * USAGE * - * Embed a `struct wimax_dev` at the beginning of the the device's + * Embed a `struct wimax_dev` at the beginning of the device's * private structure, initialize and register it. For details, see * `struct wimax_dev`s documentation. * -- cgit v1.2.3 From cee50c2a028432dadacdf55950c6c6a7875e8172 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:41 -0700 Subject: net: 9p: drop duplicate word in comment Drop doubled word "not" in a comment. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/net/9p/transport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 98a2be2de04a..3eb4261b2958 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -25,7 +25,7 @@ * @request: member function to issue a request to the transport * @cancel: member function to cancel a request (if it hasn't been sent) * @cancelled: member function to notify that a cancelled request will not - * not receive a reply + * receive a reply * * This is the basic API for a transport module which is registered by the * transport module with the 9P core network module and used by the client -- cgit v1.2.3 From c201324b54553aebb32845193680f21eb493c6e5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:42 -0700 Subject: net: caif: drop duplicate words in comments Drop doubled words "or" and "the" in several comments. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/net/caif/caif_layer.h | 4 ++-- include/uapi/linux/caif/caif_socket.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 064094101cb5..51f7bb42a936 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -156,7 +156,7 @@ struct cflayer { * CAIF packets upwards in the stack. * Packet handling rules: * - The CAIF packet (cfpkt) ownership is passed to the - * called receive function. This means that the the + * called receive function. This means that the * packet cannot be accessed after passing it to the * above layer using up->receive(). * @@ -184,7 +184,7 @@ struct cflayer { * CAIF packet downwards in the stack. * Packet handling rules: * - The CAIF packet (cfpkt) ownership is passed to the - * transmit function. This means that the the packet + * transmit function. This means that the packet * cannot be accessed after passing it to the below * layer using dn->transmit(). * diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h index 10ec1d1cf68e..d9970bbaa156 100644 --- a/include/uapi/linux/caif/caif_socket.h +++ b/include/uapi/linux/caif/caif_socket.h @@ -169,7 +169,7 @@ struct sockaddr_caif { * @CAIFSO_LINK_SELECT: Selector used if multiple CAIF Link layers are * available. Either a high bandwidth * link can be selected (CAIF_LINK_HIGH_BANDW) or - * or a low latency link (CAIF_LINK_LOW_LATENCY). + * a low latency link (CAIF_LINK_LOW_LATENCY). * This option is of type __u32. * Alternatively SO_BINDTODEVICE can be used. * -- cgit v1.2.3 From ab88d64a90951a95c4e08971b02fcb781d2067f0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:43 -0700 Subject: net: dsa.h: drop duplicate word in comment Drop doubled word "to" in a comment. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b28c95c76762..6fa418ff1175 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -612,7 +612,7 @@ struct dsa_switch_ops { * MTU change functionality. Switches can also adjust their MRU through * this method. By MTU, one understands the SDU (L2 payload) length. * If the switch needs to account for the DSA tag on the CPU port, this - * method needs to to do so privately. + * method needs to do so privately. */ int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); -- cgit v1.2.3 From 4b48b0a3aa0df49eb6d80a86c2b016bf6c3eebf9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:44 -0700 Subject: net: ip6_fib.h: drop duplicate word in comment Drop doubled word "the" in a comment. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index cc8356fd927f..ac5ff3c3afb1 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -166,7 +166,7 @@ struct fib6_info { struct fib6_node __rcu *fib6_node; /* Multipath routes: - * siblings is a list of fib6_info that have the the same metric/weight, + * siblings is a list of fib6_info that have the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ -- cgit v1.2.3 From d86f9868bdb40fc11c1e8c176ae11fb897b9d5f4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:45 -0700 Subject: net: sctp: drop duplicate words in comments Drop doubled words in several comments. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/sctp.h | 2 +- include/net/sctp/structs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e3bd198b00ae..4fc747b778eb 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -291,7 +291,7 @@ atomic_dec(&sctp_dbg_objcnt_## name) #define SCTP_DBG_OBJCNT(name) \ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0) -/* Macro to help create new entries in in the global array of +/* Macro to help create new entries in the global array of * objcnt counters. */ #define SCTP_DBG_OBJCNT_ENTRY(name) \ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fb42c90348d3..9bbb2f60db92 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1398,7 +1398,7 @@ struct sctp_stream_priorities { struct list_head prio_sched; /* List of streams scheduled */ struct list_head active; - /* The next stream stream in line */ + /* The next stream in line */ struct sctp_stream_out_ext *next; __u16 prio; }; @@ -1460,7 +1460,7 @@ struct sctp_stream { struct { /* List of streams scheduled */ struct list_head rr_list; - /* The next stream stream in line */ + /* The next stream in line */ struct sctp_stream_out_ext *rr_next; }; }; @@ -1770,7 +1770,7 @@ struct sctp_association { int max_burst; /* This is the max_retrans value for the association. This value will - * be initialized initialized from system defaults, but can be + * be initialized from system defaults, but can be * modified by the SCTP_ASSOCINFO socket option. */ int max_retrans; -- cgit v1.2.3 From 59632b220f2d61df274ed3a14a204e941051fdad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:42:46 -0700 Subject: net: ipv6: drop duplicate word in comment Drop the doubled word "by" in a comment. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2cb445a8fc9e..8d8f877e7f81 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -223,7 +223,7 @@ struct ipv6_pinfo { /* * Packed in 16bits. - * Omit one shift by by putting the signed field at MSB. + * Omit one shift by putting the signed field at MSB. */ #if defined(__BIG_ENDIAN_BITFIELD) __s16 hop_limit:9; -- cgit v1.2.3 From 339ddaa626995bc6218972ca241471f3717cc5f4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Jul 2020 19:43:33 +0200 Subject: Bluetooth: Fix update of connection state in `hci_encrypt_cfm` Starting with the upgrade to v5.8-rc3, I've noticed I wasn't able to connect to my Bluetooth headset properly anymore. While connecting to the device would eventually succeed, bluetoothd seemed to be confused about the current connection state where the state was flapping hence and forth. Bisecting this issue led to commit 3ca44c16b0dc (Bluetooth: Consolidate encryption handling in hci_encrypt_cfm, 2020-05-19), which refactored `hci_encrypt_cfm` to also handle updating the connection state. The commit in question changed the code to call `hci_connect_cfm` inside `hci_encrypt_cfm` and to change the connection state. But with the conversion, we now only update the connection state if a status was set already. In fact, the reverse should be true: the status should be updated if no status is yet set. So let's fix the isuse by reversing the condition. Fixes: 3ca44c16b0dc ("Bluetooth: Consolidate encryption handling in hci_encrypt_cfm") Signed-off-by: Patrick Steinhardt Acked-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 34ad5b207598..bee1b4778ccc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1467,7 +1467,7 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) __u8 encrypt; if (conn->state == BT_CONFIG) { - if (status) + if (!status) conn->state = BT_CONNECTED; hci_connect_cfm(conn, status); -- cgit v1.2.3 From daa5cdc3fd08407048538585b2433601d4089a82 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 14 Jul 2020 15:56:35 +0200 Subject: net: Refactor xdp_convert_buff_to_frame Move the guts of xdp_convert_buff_to_frame to a new helper, xdp_update_frame_from_buff so it can be reused removing code duplication Suggested-by: Jesper Dangaard Brouer Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: David Ahern Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/90a68c283d7ebeb48924934c9b7ac79492300472.1594734381.git.lorenzo@kernel.org --- include/net/xdp.h | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 609f819ed08b..5b383c450858 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -121,39 +121,48 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->frame_sz = frame->frame_sz; } -/* Convert xdp_buff to xdp_frame */ static inline -struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) +int xdp_update_frame_from_buff(struct xdp_buff *xdp, + struct xdp_frame *xdp_frame) { - struct xdp_frame *xdp_frame; - int metasize; - int headroom; - - if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) - return xdp_convert_zc_to_xdp_frame(xdp); + int metasize, headroom; /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; metasize = xdp->data - xdp->data_meta; metasize = metasize > 0 ? metasize : 0; if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) - return NULL; + return -ENOSPC; /* Catch if driver didn't reserve tailroom for skb_shared_info */ if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { XDP_WARN("Driver BUG: missing reserved tailroom"); - return NULL; + return -ENOSPC; } - /* Store info in top of packet */ - xdp_frame = xdp->data_hard_start; - xdp_frame->data = xdp->data; xdp_frame->len = xdp->data_end - xdp->data; xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + return 0; +} + +/* Convert xdp_buff to xdp_frame */ +static inline +struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) +{ + struct xdp_frame *xdp_frame; + + if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) + return xdp_convert_zc_to_xdp_frame(xdp); + + /* Store info in top of packet */ + xdp_frame = xdp->data_hard_start; + if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) + return NULL; + /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ xdp_frame->mem = xdp->rxq->mem; -- cgit v1.2.3 From 644bfe51fa49c22244d24e896cd3fe3ee2f2cfd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:37 +0200 Subject: cpumap: Formalize map value as a named struct As it has been already done for devmap, introduce 'struct bpf_cpumap_val' to formalize the expected values that can be passed in for a CPUMAP. Update cpumap code to use the struct. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/754f950674665dae6139c061d28c1d982aaf4170.1594734381.git.lorenzo@kernel.org --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5e386389913a..109623527358 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3849,6 +3849,15 @@ struct bpf_devmap_val { } bpf_prog; }; +/* CPUMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_cpumap_val { + __u32 qsize; /* queue size to remote target CPU */ +}; + enum sk_action { SK_DROP = 0, SK_PASS, -- cgit v1.2.3 From 9216477449f33cdbc9c9a99d49f500b7fbb81702 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:38 +0200 Subject: bpf: cpumap: Add the possibility to attach an eBPF program to cpumap Introduce the capability to attach an eBPF program to cpumap entries. The idea behind this feature is to add the possibility to define on which CPU run the eBPF program if the underlying hw does not support RSS. Current supported verdicts are XDP_DROP and XDP_PASS. This patch has been tested on Marvell ESPRESSObin using xdp_redirect_cpu sample available in the kernel tree to identify possible performance regressions. Results show there are no observable differences in packet-per-second: $./xdp_redirect_cpu --progname xdp_cpu_map0 --dev eth0 --cpu 1 rx: 354.8 Kpps rx: 356.0 Kpps rx: 356.8 Kpps rx: 356.3 Kpps rx: 356.6 Kpps rx: 356.6 Kpps rx: 356.7 Kpps rx: 355.8 Kpps rx: 356.8 Kpps rx: 356.8 Kpps Co-developed-by: Jesper Dangaard Brouer Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/5c9febdf903d810b3415732e5cd98491d7d9067a.1594734381.git.lorenzo@kernel.org --- include/linux/bpf.h | 6 ++++++ include/net/xdp.h | 5 +++++ include/trace/events/xdp.h | 14 ++++++++++---- include/uapi/linux/bpf.h | 5 +++++ 4 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c67c88ad35f8..54ad426dbea1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1272,6 +1272,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); +bool cpu_map_prog_allowed(struct bpf_map *map); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) @@ -1432,6 +1433,11 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, return 0; } +static inline bool cpu_map_prog_allowed(struct bpf_map *map) +{ + return false; +} + static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { diff --git a/include/net/xdp.h b/include/net/xdp.h index 5b383c450858..83b9e0142b52 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -98,6 +98,11 @@ struct xdp_frame { struct net_device *dev_rx; /* used by cpumap */ }; +struct xdp_cpumap_stats { + unsigned int pass; + unsigned int drop; +}; + /* Clear kernel pointers in xdp_frame */ static inline void xdp_scrub_frame(struct xdp_frame *frame) { diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index b73d3e141323..e2c99f5bee39 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -177,9 +177,9 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TRACE_EVENT(xdp_cpumap_kthread, TP_PROTO(int map_id, unsigned int processed, unsigned int drops, - int sched), + int sched, struct xdp_cpumap_stats *xdp_stats), - TP_ARGS(map_id, processed, drops, sched), + TP_ARGS(map_id, processed, drops, sched, xdp_stats), TP_STRUCT__entry( __field(int, map_id) @@ -188,6 +188,8 @@ TRACE_EVENT(xdp_cpumap_kthread, __field(unsigned int, drops) __field(unsigned int, processed) __field(int, sched) + __field(unsigned int, xdp_pass) + __field(unsigned int, xdp_drop) ), TP_fast_assign( @@ -197,16 +199,20 @@ TRACE_EVENT(xdp_cpumap_kthread, __entry->drops = drops; __entry->processed = processed; __entry->sched = sched; + __entry->xdp_pass = xdp_stats->pass; + __entry->xdp_drop = xdp_stats->drop; ), TP_printk("kthread" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" - " sched=%d", + " sched=%d" + " xdp_pass=%u xdp_drop=%u", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, - __entry->sched) + __entry->sched, + __entry->xdp_pass, __entry->xdp_drop) ); TRACE_EVENT(xdp_cpumap_enqueue, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 109623527358..c010b57fce3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -227,6 +227,7 @@ enum bpf_attach_type { BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, BPF_CGROUP_INET_SOCK_RELEASE, + BPF_XDP_CPUMAP, __MAX_BPF_ATTACH_TYPE }; @@ -3856,6 +3857,10 @@ struct bpf_devmap_val { */ struct bpf_cpumap_val { __u32 qsize; /* queue size to remote target CPU */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; }; enum sk_action { -- cgit v1.2.3 From 28b1520ebf81ced970141640d90279ac7b9f1f9a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:39 +0200 Subject: bpf: cpumap: Implement XDP_REDIRECT for eBPF programs attached to map entries Introduce XDP_REDIRECT support for eBPF programs attached to cpumap entries. This patch has been tested on Marvell ESPRESSObin using a modified version of xdp_redirect_cpu sample in order to attach a XDP program to CPUMAP entries to perform a redirect on the mvneta interface. In particular the following scenario has been tested: rq (cpu0) --> mvneta - XDP_REDIRECT (cpu0) --> CPUMAP - XDP_REDIRECT (cpu1) --> mvneta $./xdp_redirect_cpu -p xdp_cpu_map0 -d eth0 -c 1 -e xdp_redirect \ -f xdp_redirect_kern.o -m tx_port -r eth0 tx: 285.2 Kpps rx: 285.2 Kpps Attaching a simple XDP program on eth0 to perform XDP_TX gives comparable results: tx: 288.4 Kpps rx: 288.4 Kpps Co-developed-by: Jesper Dangaard Brouer Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/2cf8373a731867af302b00c4ff16c122630c4980.1594734381.git.lorenzo@kernel.org --- include/net/xdp.h | 1 + include/trace/events/xdp.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 83b9e0142b52..5be0d4d65b94 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -99,6 +99,7 @@ struct xdp_frame { }; struct xdp_cpumap_stats { + unsigned int redirect; unsigned int pass; unsigned int drop; }; diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index e2c99f5bee39..cd24e8a59529 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -190,6 +190,7 @@ TRACE_EVENT(xdp_cpumap_kthread, __field(int, sched) __field(unsigned int, xdp_pass) __field(unsigned int, xdp_drop) + __field(unsigned int, xdp_redirect) ), TP_fast_assign( @@ -201,18 +202,19 @@ TRACE_EVENT(xdp_cpumap_kthread, __entry->sched = sched; __entry->xdp_pass = xdp_stats->pass; __entry->xdp_drop = xdp_stats->drop; + __entry->xdp_redirect = xdp_stats->redirect; ), TP_printk("kthread" " cpu=%d map_id=%d action=%s" " processed=%u drops=%u" " sched=%d" - " xdp_pass=%u xdp_drop=%u", + " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", __entry->cpu, __entry->map_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->processed, __entry->drops, __entry->sched, - __entry->xdp_pass, __entry->xdp_drop) + __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) ); TRACE_EVENT(xdp_cpumap_enqueue, -- cgit v1.2.3 From bfdfa51702dec67e9fcd52568b4cf3c7f799db8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:29:11 -0700 Subject: bpf: Drop duplicated words in uapi helper comments Drop doubled words "will" and "attach". Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/6b9f71ae-4f8e-0259-2c5d-187ddaefe6eb@infradead.org --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c010b57fce3f..7ac3992dacfe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2420,7 +2420,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2457,7 +2457,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -4000,7 +4000,7 @@ struct bpf_link_info { /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on - * attach attach type). + * attach type). */ struct bpf_sock_addr { __u32 user_family; /* Allows 4-byte read, but no write. */ -- cgit v1.2.3 From 9a6ad1ad71fbc5a52617e016a3608d71b91f62e8 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 18 Nov 2019 14:30:20 +0200 Subject: net/mlx5: Accel, Add core IPsec support for the Connect-X family This to set the base for downstream patches to support the new IPsec implementation of the Connect-X family. Following modifications made: - Remove accel layer dependency from MLX5_FPGA_IPSEC. - Introduce accel_ipsec_ops, each IPsec device will have to support these ops. Signed-off-by: Raed Salem Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 6 +++--- include/linux/mlx5/driver.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 96ebaa94a92e..dacf69516002 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -126,7 +126,7 @@ enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, }; -#ifdef CONFIG_MLX5_FPGA_IPSEC +#ifdef CONFIG_MLX5_ACCEL u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); @@ -152,5 +152,5 @@ static inline int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } -#endif -#endif +#endif /* CONFIG_MLX5_ACCEL */ +#endif /* __MLX5_ACCEL_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1e6ca716635a..6a97ad601991 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -707,6 +707,9 @@ struct mlx5_core_dev { } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; +#endif +#ifdef CONFIG_MLX5_ACCEL + const struct mlx5_accel_ipsec_ops *ipsec_ops; #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; -- cgit v1.2.3 From 78fb6122fa2b6b55fafee1b32cd94913ad72f8a4 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 8 Apr 2020 20:09:05 -0500 Subject: net/mlx5: Add IPsec related Flow steering entry's fields Add FTE actions IPsec ENCRYPT/DECRYPT Add ipsec_obj_id field in FTE Add new action field MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME Signed-off-by: Huy Nguyen Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 5 ++++- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6c5aa0a21425..92d991d93757 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -207,7 +207,10 @@ struct mlx5_flow_act { u32 action; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; - uintptr_t esp_id; + union { + u32 ipsec_obj_id; + uintptr_t esp_id; + }; u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 791766e15d5c..9e64710bc54f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; - u8 reserved_at_1a[0x6]; + u8 reserved_at_1a[0x2]; + u8 ipsec_encrypt[0x1]; + u8 ipsec_decrypt[0x1]; + u8 reserved_at_1e[0x2]; + u8 termination_table_raw_traffic[0x1]; u8 reserved_at_21[0x1]; u8 log_max_ft_size[0x6]; @@ -2965,6 +2969,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100, MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 = 0x400, MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, + MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000, + MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000, }; enum { @@ -3006,7 +3012,8 @@ struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_vlan_bits push_vlan_2; - u8 reserved_at_120[0xe0]; + u8 ipsec_obj_id[0x20]; + u8 reserved_at_140[0xc0]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -5752,6 +5759,7 @@ enum { MLX5_ACTION_IN_FIELD_METADATA_REG_C_7 = 0x58, MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59, MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B, + MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME = 0x5D, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { -- cgit v1.2.3 From 55f656cdb851bae32980d83d2494201e79d93b63 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 14 Jul 2020 20:03:07 +0300 Subject: net: sched: Do not drop root lock in tcf_qevent_handle() Mirred currently does not mix well with blocks executed after the qdisc root lock is taken. This includes classification blocks (such as in PRIO, ETS, DRR qdiscs) and qevents. The locking caused by the packet mirrored by mirred can cause deadlocks: either when the thread of execution attempts to take the lock a second time, or when two threads end up waiting on each other's locks. The qevent patchset attempted to not introduce further badness of this sort, and dropped the lock before executing the qevent block. However this lead to too little locking and races between qdisc configuration and packet enqueue in the RED qdisc. Before the deadlock issues are solved in a way that can be applied across many qdiscs reasonably easily, do for qevents what is done for the classification blocks and just keep holding the root lock. Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 690a7f49c8f9..d4d461236351 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -568,7 +568,7 @@ void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch); int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, struct netlink_ext_ack *extack); struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, - spinlock_t *root_lock, struct sk_buff **to_free, int *ret); + struct sk_buff **to_free, int *ret); int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe); #else static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, @@ -591,7 +591,7 @@ static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlatt static inline struct sk_buff * tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, - spinlock_t *root_lock, struct sk_buff **to_free, int *ret) + struct sk_buff **to_free, int *ret) { return skb; } -- cgit v1.2.3 From ac5c66f261b7174d0d9aaeb2bf9f8c2c2dbad0bd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 14 Jul 2020 20:03:08 +0300 Subject: Revert "net: sched: Pass root lock to Qdisc_ops.enqueue" This reverts commit aebe4426ccaa4838f36ea805cdf7d76503e65117. Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fceb3d63c925..c510b03b9751 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -57,7 +57,6 @@ struct qdisc_skb_head { struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, - spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; @@ -242,7 +241,6 @@ struct Qdisc_ops { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, - spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); @@ -790,11 +788,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, #endif } -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); - return sch->enqueue(skb, sch, root_lock, to_free); + return sch->enqueue(skb, sch, to_free); } static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, -- cgit v1.2.3 From e506addeff844237d60545ef4f6141de21471caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Rodr=C3=ADguez=20P=C3=A9rez?= Date: Wed, 15 Jul 2020 20:40:57 +0200 Subject: net: cdc_ether: export usbnet_cdc_update_filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the function available to other drivers, like cdc_ncm. Signed-off-by: Miguel Rodríguez Pérez Acked-by: Oliver Neukum Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index b0bff3083278..3a856963a363 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -207,6 +207,7 @@ struct cdc_state { struct usb_interface *data; }; +extern void usbnet_cdc_update_filter(struct usbnet *dev); extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *); extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf); extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); -- cgit v1.2.3 From 1ea2b748b5eb4f9b6c8f66d098f1277afb724336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 15 Jul 2020 20:40:58 +0200 Subject: net: usbnet: export usbnet_set_rx_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function can be reused by other usbnet minidrivers. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 3a856963a363..2e4f7721fc4e 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -274,6 +274,7 @@ extern int usbnet_set_link_ksettings(struct net_device *net, extern u32 usbnet_get_link(struct net_device *net); extern u32 usbnet_get_msglevel(struct net_device *); extern void usbnet_set_msglevel(struct net_device *, u32); +extern void usbnet_set_rx_mode(struct net_device *net); extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); extern int usbnet_nway_reset(struct net_device *net); -- cgit v1.2.3 From e3a5a1e8b6548f5d37328e2d3571edc5c9e6d7c0 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 16 Jul 2020 12:12:35 -0700 Subject: tcp: add SNMP counter for no. of duplicate segments reported by DSACK There are two existing SNMP counters, TCPDSACKRecv and TCPDSACKOfoRecv, which are incremented depending on whether the DSACKed range is below the cumulative ACK sequence number or not. Unfortunately, these both implicitly assume each DSACK covers only one segment. This makes these counters unusable for estimating spurious retransmit rates, or real/non-spurious loss rate. This patch introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks the estimated number of duplicate segments based on: (DSACKed sequence range) / MSS. This counter is usable for estimating spurious retransmit rates, or real/non-spurious loss rate. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 7d91f4debc48..cee9f8e6fce3 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -287,6 +287,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ + LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From ce3aa9cc5109363099b7c4ac82e2c9768afcaf31 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:22 +0200 Subject: bpf, netns: Handle multiple link attachments Extend the BPF netns link callbacks to rebuild (grow/shrink) or update the prog_array at given position when link gets attached/updated/released. This let's us lift the limit of having just one link attached for the new attach type introduced by subsequent patch. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-2-jakub@cloudflare.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 54ad426dbea1..c8c9eabcd106 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -928,6 +928,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, struct bpf_prog *old_prog); +int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index); +int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, + struct bpf_prog *prog); int bpf_prog_array_copy_info(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt); -- cgit v1.2.3 From e9ddbb7707ff5891616240026062b8c1e29864ca Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:23 +0200 Subject: bpf: Introduce SK_LOOKUP program type with a dedicated attach point Add a new program type BPF_PROG_TYPE_SK_LOOKUP with a dedicated attach type BPF_SK_LOOKUP. The new program kind is to be invoked by the transport layer when looking up a listening socket for a new connection request for connection oriented protocols, or when looking up an unconnected socket for a packet for connection-less protocols. When called, SK_LOOKUP BPF program can select a socket that will receive the packet. This serves as a mechanism to overcome the limits of what bind() API allows to express. Two use-cases driving this work are: (1) steer packets destined to an IP range, on fixed port to a socket 192.0.2.0/24, port 80 -> NGINX socket (2) steer packets destined to an IP address, on any port to a socket 198.51.100.1, any port -> L7 proxy socket In its run-time context program receives information about the packet that triggered the socket lookup. Namely IP version, L4 protocol identifier, and address 4-tuple. Context can be further extended to include ingress interface identifier. To select a socket BPF program fetches it from a map holding socket references, like SOCKMAP or SOCKHASH, and calls bpf_sk_assign(ctx, sk, ...) helper to record the selection. Transport layer then uses the selected socket as a result of socket lookup. In its basic form, SK_LOOKUP acts as a filter and hence must return either SK_PASS or SK_DROP. If the program returns with SK_PASS, transport should look for a socket to receive the packet, or use the one selected by the program if available, while SK_DROP informs the transport layer that the lookup should fail. This patch only enables the user to attach an SK_LOOKUP program to a network namespace. Subsequent patches hook it up to run on local delivery path in ipv4 and ipv6 stacks. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-3-jakub@cloudflare.com --- include/linux/bpf-netns.h | 3 ++ include/linux/bpf.h | 1 + include/linux/bpf_types.h | 2 ++ include/linux/filter.h | 17 +++++++++++ include/uapi/linux/bpf.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 100 insertions(+) (limited to 'include') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 47d5b0c708c9..722f799c1a2e 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -8,6 +8,7 @@ enum netns_bpf_attach_type { NETNS_BPF_INVALID = -1, NETNS_BPF_FLOW_DISSECTOR = 0, + NETNS_BPF_SK_LOOKUP, MAX_NETNS_BPF_ATTACH_TYPE }; @@ -17,6 +18,8 @@ to_netns_bpf_attach_type(enum bpf_attach_type attach_type) switch (attach_type) { case BPF_FLOW_DISSECTOR: return NETNS_BPF_FLOW_DISSECTOR; + case BPF_SK_LOOKUP: + return NETNS_BPF_SK_LOOKUP; default: return NETNS_BPF_INVALID; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c8c9eabcd106..adb16bdc5f0a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -249,6 +249,7 @@ enum bpf_arg_type { ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ + ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a18ae82a298a..a52a5688418e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -64,6 +64,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, struct sk_reuseport_md, struct sk_reuseport_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup, + struct bpf_sk_lookup, struct bpf_sk_lookup_kern) #endif #if defined(CONFIG_BPF_JIT) BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, diff --git a/include/linux/filter.h b/include/linux/filter.h index 0b0144752d78..fa1ea12ad2cd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1278,4 +1278,21 @@ struct bpf_sockopt_kern { s32 retval; }; +struct bpf_sk_lookup_kern { + u16 family; + u16 protocol; + struct { + __be32 saddr; + __be32 daddr; + } v4; + struct { + const struct in6_addr *saddr; + const struct in6_addr *daddr; + } v6; + __be16 sport; + u16 dport; + struct sock *selected_sk; + bool no_reuseport; +}; + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7ac3992dacfe..54d0c886e3ba 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -189,6 +189,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, }; enum bpf_attach_type { @@ -228,6 +229,7 @@ enum bpf_attach_type { BPF_XDP_DEVMAP, BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, + BPF_SK_LOOKUP, __MAX_BPF_ATTACH_TYPE }; @@ -3069,6 +3071,10 @@ union bpf_attr { * * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and + * **BPF_PROG_TYPE_SCHED_ACT** programs. + * * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, * will cause *skb* to be delivered to the specified socket. @@ -3094,6 +3100,56 @@ union bpf_attr { * **-ESOCKTNOSUPPORT** if the socket type is not supported * (reuseport). * + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) + * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. + * + * Select the *sk* as a result of a socket lookup. + * + * For the operation to succeed passed socket must be compatible + * with the packet description provided by the *ctx* object. + * + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must + * be an exact match. While IP family (**AF_INET** or + * **AF_INET6**) must be compatible, that is IPv6 sockets + * that are not v6-only can be selected for IPv4 packets. + * + * Only TCP listeners and UDP unconnected sockets can be + * selected. *sk* can also be NULL to reset any previous + * selection. + * + * *flags* argument can combination of following values: + * + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous + * socket selection, potentially done by a BPF program + * that ran before us. + * + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip + * load-balancing within reuseport group for the socket + * being selected. + * + * On success *ctx->sk* will point to the selected socket. + * + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is + * not compatible with packet family (*ctx->family*). + * + * * **-EEXIST** if socket has been already selected, + * potentially by another program, and + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. + * + * * **-EINVAL** if unsupported flags were specified. + * + * * **-EPROTOTYPE** if socket L4 protocol + * (*sk->protocol*) doesn't match packet protocol + * (*ctx->protocol*). + * + * * **-ESOCKTNOSUPPORT** if socket is not in allowed + * state (TCP listening or UDP unconnected). + * * u64 bpf_ktime_get_boot_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. @@ -3607,6 +3663,12 @@ enum { BPF_RINGBUF_HDR_SZ = 8, }; +/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ +enum { + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), +}; + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -4349,4 +4411,19 @@ struct bpf_pidns_info { __u32 pid; __u32 tgid; }; + +/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ +struct bpf_sk_lookup { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ + __u32 remote_ip4; /* Network byte order */ + __u32 remote_ip6[4]; /* Network byte order */ + __u32 remote_port; /* Network byte order */ + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 1559b4aa1db443096af493c7d621dc156054babe Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:25 +0200 Subject: inet: Run SK_LOOKUP BPF program on socket lookup Run a BPF program before looking up a listening socket on the receive path. Program selects a listening socket to yield as result of socket lookup by calling bpf_sk_assign() helper and returning SK_PASS code. Program can revert its decision by assigning a NULL socket with bpf_sk_assign(). Alternatively, BPF program can also fail the lookup by returning with SK_DROP, or let the lookup continue as usual with SK_PASS on return, when no socket has been selected with bpf_sk_assign(). This lets the user match packets with listening sockets freely at the last possible point on the receive path, where we know that packets are destined for local delivery after undergoing policing, filtering, and routing. With BPF code selecting the socket, directing packets destined to an IP range or to a port range to a single socket becomes possible. In case multiple programs are attached, they are run in series in the order in which they were attached. The end result is determined from return codes of all the programs according to following rules: 1. If any program returned SK_PASS and selected a valid socket, the socket is used as result of socket lookup. 2. If more than one program returned SK_PASS and selected a socket, last selection takes effect. 3. If any program returned SK_DROP, and no program returned SK_PASS and selected a socket, socket lookup fails with -ECONNREFUSED. 4. If all programs returned SK_PASS and none of them selected a socket, socket lookup continues to htable-based lookup. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-5-jakub@cloudflare.com --- include/linux/filter.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa1ea12ad2cd..c4f54c216347 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1295,4 +1295,95 @@ struct bpf_sk_lookup_kern { bool no_reuseport; }; +extern struct static_key_false bpf_sk_lookup_enabled; + +/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup. + * + * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and + * SK_DROP. Their meaning is as follows: + * + * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result + * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup + * SK_DROP : terminate lookup with -ECONNREFUSED + * + * This macro aggregates return values and selected sockets from + * multiple BPF programs according to following rules in order: + * + * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk, + * macro result is SK_PASS and last ctx.selected_sk is used. + * 2. If any program returned SK_DROP return value, + * macro result is SK_DROP. + * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL. + * + * Caller must ensure that the prog array is non-NULL, and that the + * array as well as the programs it contains remain valid. + */ +#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_sk_lookup_kern *_ctx = &(ctx); \ + struct bpf_prog_array_item *_item; \ + struct sock *_selected_sk = NULL; \ + bool _no_reuseport = false; \ + struct bpf_prog *_prog; \ + bool _all_pass = true; \ + u32 _ret; \ + \ + migrate_disable(); \ + _item = &(array)->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + /* restore most recent selection */ \ + _ctx->selected_sk = _selected_sk; \ + _ctx->no_reuseport = _no_reuseport; \ + \ + _ret = func(_prog, _ctx); \ + if (_ret == SK_PASS && _ctx->selected_sk) { \ + /* remember last non-NULL socket */ \ + _selected_sk = _ctx->selected_sk; \ + _no_reuseport = _ctx->no_reuseport; \ + } else if (_ret == SK_DROP && _all_pass) { \ + _all_pass = false; \ + } \ + _item++; \ + } \ + _ctx->selected_sk = _selected_sk; \ + _ctx->no_reuseport = _no_reuseport; \ + migrate_enable(); \ + _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ + }) + +static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const u16 dport, + struct sock **psk) +{ + struct bpf_prog_array *run_array; + struct sock *selected_sk = NULL; + bool no_reuseport = false; + + rcu_read_lock(); + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); + if (run_array) { + struct bpf_sk_lookup_kern ctx = { + .family = AF_INET, + .protocol = protocol, + .v4.saddr = saddr, + .v4.daddr = daddr, + .sport = sport, + .dport = dport, + }; + u32 act; + + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); + if (act == SK_PASS) { + selected_sk = ctx.selected_sk; + no_reuseport = ctx.no_reuseport; + } else { + selected_sk = ERR_PTR(-ECONNREFUSED); + } + } + rcu_read_unlock(); + *psk = selected_sk; + return no_reuseport; +} + #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From 1122702f02678597c4f1c7d316365ef502aafe08 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:27 +0200 Subject: inet6: Run SK_LOOKUP BPF program on socket lookup Following ipv4 stack changes, run a BPF program attached to netns before looking up a listening socket. Program can return a listening socket to use as result of socket lookup, fail the lookup, or take no action. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-7-jakub@cloudflare.com --- include/linux/filter.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index c4f54c216347..8252572db918 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1386,4 +1386,43 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, return no_reuseport; } +#if IS_ENABLED(CONFIG_IPV6) +static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 dport, + struct sock **psk) +{ + struct bpf_prog_array *run_array; + struct sock *selected_sk = NULL; + bool no_reuseport = false; + + rcu_read_lock(); + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); + if (run_array) { + struct bpf_sk_lookup_kern ctx = { + .family = AF_INET6, + .protocol = protocol, + .v6.saddr = saddr, + .v6.daddr = daddr, + .sport = sport, + .dport = dport, + }; + u32 act; + + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); + if (act == SK_PASS) { + selected_sk = ctx.selected_sk; + no_reuseport = ctx.no_reuseport; + } else { + selected_sk = ERR_PTR(-ECONNREFUSED); + } + } + rcu_read_unlock(); + *psk = selected_sk; + return no_reuseport; +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ + #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From c4471ad9a50d5548e66ae4511acfb1dc23a48744 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 20 Jul 2020 00:03:33 +0200 Subject: net: phy: add USXGMII link partner ability constants The constants are taken from the USXGMII Singleport Copper Interface specification. The naming are based on the SGMII ones, but with an MDIO_ prefix. Signed-off-by: Michael Walle Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 4bcb41c71b8c..3f302e2523b2 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -324,4 +324,30 @@ static inline __u16 mdio_phy_id_c45(int prtad, int devad) return MDIO_PHY_ID_C45 | (prtad << 5) | devad; } +/* UsxgmiiChannelInfo[15:0] for USXGMII in-band auto-negotiation.*/ +#define MDIO_USXGMII_EEE_CLK_STP 0x0080 /* EEE clock stop supported */ +#define MDIO_USXGMII_EEE 0x0100 /* EEE supported */ +#define MDIO_USXGMII_SPD_MASK 0x0e00 /* USXGMII speed mask */ +#define MDIO_USXGMII_FULL_DUPLEX 0x1000 /* USXGMII full duplex */ +#define MDIO_USXGMII_DPX_SPD_MASK 0x1e00 /* USXGMII duplex and speed bits */ +#define MDIO_USXGMII_10 0x0000 /* 10Mbps */ +#define MDIO_USXGMII_10HALF 0x0000 /* 10Mbps half-duplex */ +#define MDIO_USXGMII_10FULL 0x1000 /* 10Mbps full-duplex */ +#define MDIO_USXGMII_100 0x0200 /* 100Mbps */ +#define MDIO_USXGMII_100HALF 0x0200 /* 100Mbps half-duplex */ +#define MDIO_USXGMII_100FULL 0x1200 /* 100Mbps full-duplex */ +#define MDIO_USXGMII_1000 0x0400 /* 1000Mbps */ +#define MDIO_USXGMII_1000HALF 0x0400 /* 1000Mbps half-duplex */ +#define MDIO_USXGMII_1000FULL 0x1400 /* 1000Mbps full-duplex */ +#define MDIO_USXGMII_10G 0x0600 /* 10Gbps */ +#define MDIO_USXGMII_10GHALF 0x0600 /* 10Gbps half-duplex */ +#define MDIO_USXGMII_10GFULL 0x1600 /* 10Gbps full-duplex */ +#define MDIO_USXGMII_2500 0x0800 /* 2500Mbps */ +#define MDIO_USXGMII_2500HALF 0x0800 /* 2500Mbps half-duplex */ +#define MDIO_USXGMII_2500FULL 0x1800 /* 2500Mbps full-duplex */ +#define MDIO_USXGMII_5000 0x0a00 /* 5000Mbps */ +#define MDIO_USXGMII_5000HALF 0x0a00 /* 5000Mbps half-duplex */ +#define MDIO_USXGMII_5000FULL 0x1a00 /* 5000Mbps full-duplex */ +#define MDIO_USXGMII_LINK 0x8000 /* PHY link with copper-side partner */ + #endif /* _UAPI__LINUX_MDIO_H__ */ -- cgit v1.2.3 From a06d30ae7af492497ffbca6abf1621d508b8fcaa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:10 +0200 Subject: net/atm: remove the atmdev_ops {get, set}sockopt methods All implementations of these two methods are dummies that always return -EINVAL. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/atmdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 8124815eb121..5d5ff2203fa2 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -176,11 +176,6 @@ struct atm_dev { #define ATM_OF_IMMED 1 /* Attempt immediate delivery */ #define ATM_OF_INRATE 2 /* Attempt in-rate delivery */ - -/* - * ioctl, getsockopt, and setsockopt are optional and can be set to NULL. - */ - struct atmdev_ops { /* only send is required */ void (*dev_close)(struct atm_dev *dev); int (*open)(struct atm_vcc *vcc); @@ -190,10 +185,6 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif - int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, - void __user *optval,int optlen); - int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, - void __user *optval,unsigned int optlen); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, -- cgit v1.2.3 From 4d295e54611509854a12c26f95a6f4430731d614 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:13 +0200 Subject: net: simplify cBPF setsockopt compat handling Add a helper that copies either a native or compat bpf_fprog from userspace after verifying the length, and remove the compat setsockopt handlers that now aren't required. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ++-- include/net/compat.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0b0144752d78..4d049c8e1fbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -502,13 +502,11 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) offsetof(TYPE, MEMBER); \ }) -#ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { u16 len; compat_uptr_t filter; /* struct sock_filter * */ }; -#endif struct sock_fprog_kern { u16 len; @@ -1278,4 +1276,6 @@ struct bpf_sockopt_kern { s32 retval; }; +int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len); + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/net/compat.h b/include/net/compat.h index f241666117d8..745db0d605b6 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -61,7 +61,6 @@ int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, compat_size_t *len); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); -struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); int put_cmsg_compat(struct msghdr*, int, int, int, void *); int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, -- cgit v1.2.3 From 8c918ffbbad49454ed26c53eb1b90bf98bb5e394 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:14 +0200 Subject: net: remove compat_sock_common_{get,set}sockopt Add the compat handling to sock_common_{get,set}sockopt instead, keyed of in_compat_syscall(). This allow to remove the now unused ->compat_{get,set}sockopt methods from struct proto_ops. Signed-off-by: Christoph Hellwig Acked-by: Matthieu Baerts Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- include/linux/net.h | 6 ------ include/net/sock.h | 4 ---- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 016a9c5faa34..858ff1d98154 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -165,12 +165,6 @@ struct proto_ops { int optname, char __user *optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -#ifdef CONFIG_COMPAT - int (*compat_setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, unsigned int optlen); - int (*compat_getsockopt)(struct socket *sock, int level, - int optname, char __user *optval, int __user *optlen); -#endif void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); diff --git a/include/net/sock.h b/include/net/sock.h index 4bf884165148..1fd7cf5fc751 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1744,10 +1744,6 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); -int compat_sock_common_getsockopt(struct socket *sock, int level, - int optname, char __user *optval, int __user *optlen); -int compat_sock_common_setsockopt(struct socket *sock, int level, - int optname, char __user *optval, unsigned int optlen); void sk_common_release(struct sock *sk); -- cgit v1.2.3 From 55db9c0e853421fa71cac5e6855898601f78a1f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:15 +0200 Subject: net: remove compat_sys_{get,set}sockopt Now that the ->compat_{get,set}sockopt proto_ops methods are gone there is no good reason left to keep the compat syscalls separate. This fixes the odd use of unsigned int for the compat_setsockopt optlen and the missing sock_use_custom_sol_socket. It would also easily allow running the eBPF hooks for the compat syscalls, but such a large change in behavior does not belong into a consolidation patch like this one. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/compat.h | 4 ---- include/linux/syscalls.h | 4 ++++ include/uapi/asm-generic/unistd.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index e90100c0de72..c4255d8a4a8a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -737,10 +737,6 @@ asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned flags, struct sockaddr __user *addr, int __user *addrlen); -asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen); -asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen); asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b951a87da987..aa46825c6f9d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1424,4 +1424,8 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct old_timespec32 __user *timeout); +int __sys_getsockopt(int fd, int level, int optname, char __user *optval, + int __user *optlen); +int __sys_setsockopt(int fd, int level, int optname, char __user *optval, + int optlen); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f4a01305d9a6..c8c189a5f0a6 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -606,9 +606,9 @@ __SYSCALL(__NR_sendto, sys_sendto) #define __NR_recvfrom 207 __SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom) #define __NR_setsockopt 208 -__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt) +__SC_COMP(__NR_setsockopt, sys_setsockopt, sys_setsockopt) #define __NR_getsockopt 209 -__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt) +__SC_COMP(__NR_getsockopt, sys_getsockopt, sys_getsockopt) #define __NR_shutdown 210 __SYSCALL(__NR_shutdown, sys_shutdown) #define __NR_sendmsg 211 -- cgit v1.2.3 From 77d4df41d53e5c2af14db26f20fe50da52e382ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:20 +0200 Subject: netfilter: remove the compat_{get,set} methods All instances handle compat sockopts via in_compat_syscall() now, so remove the compat_{get,set} methods as well as the compat_nf_{get,set}sockopt wrappers. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index eb312e7ca36e..711b4d4486f0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -164,17 +164,9 @@ struct nf_sockopt_ops { int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len); -#ifdef CONFIG_COMPAT - int (*compat_set)(struct sock *sk, int optval, - void __user *user, unsigned int len); -#endif int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); -#ifdef CONFIG_COMPAT - int (*compat_get)(struct sock *sk, int optval, - void __user *user, int *len); -#endif /* Use the module struct to lock set/get code in place */ struct module *owner; }; @@ -350,12 +342,6 @@ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -#ifdef CONFIG_COMPAT -int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, - char __user *opt, unsigned int len); -int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, - char __user *opt, int *len); -#endif struct flowi; struct nf_queue_entry; -- cgit v1.2.3 From c34bc10d2535719ddf77d44ee849f6c7589583ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:21 +0200 Subject: netfilter: remove the compat argument to xt_copy_counters_from_user Lift the in_compat_syscall() from the callers instead. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5da88451853b..b8b943ee7b8b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -302,7 +302,7 @@ int xt_data_to_user(void __user *dst, const void *src, int usersize, int size, int aligned_size); void *xt_copy_counters_from_user(const void __user *user, unsigned int len, - struct xt_counters_info *info, bool compat); + struct xt_counters_info *info); struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, -- cgit v1.2.3 From b6238c04c0e5dbe7ae4ea48e96e004905b120a04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:26 +0200 Subject: net/ipv4: remove compat_ip_{get,set}sockopt Handle the few cases that need special treatment in-line using in_compat_syscall(). Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/ip.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 862c9545833a..3d34acc95ca8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -727,10 +727,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int compat_ip_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -int compat_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -- cgit v1.2.3 From 3021ad529950d07e0408d65d0f1df00454c1d223 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:30 +0200 Subject: net/ipv6: remove compat_ipv6_{get,set}sockopt Handle the few cases that need special treatment in-line using in_compat_syscall(). This also removes all the now unused compat_{get,set}sockopt methods. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 13 ------------- include/net/ipv6.h | 4 ---- include/net/sctp/structs.h | 10 ---------- include/net/sock.h | 8 -------- include/net/tcp.h | 4 ---- 5 files changed, 39 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e5b388f5fa20..157c60cca0ca 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -48,14 +48,6 @@ struct inet_connection_sock_af_ops { char __user *optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -#ifdef CONFIG_COMPAT - int (*compat_setsockopt)(struct sock *sk, - int level, int optname, - char __user *optval, unsigned int optlen); - int (*compat_getsockopt)(struct sock *sk, - int level, int optname, - char __user *optval, int __user *optlen); -#endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); void (*mtu_reduced)(struct sock *sk); }; @@ -311,11 +303,6 @@ void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); -int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); - struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5e65bf2fd32d..262fc88dbd7e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1088,10 +1088,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9bbb2f60db92..233bbf7df5d6 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -438,16 +438,6 @@ struct sctp_af { int optname, char __user *optval, int __user *optlen); - int (*compat_setsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - unsigned int optlen); - int (*compat_getsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int __user *optlen); void (*get_dst) (struct sctp_transport *t, union sctp_addr *saddr, struct flowi *fl, diff --git a/include/net/sock.h b/include/net/sock.h index 1fd7cf5fc751..3bd8bc578bf3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1147,14 +1147,6 @@ struct proto { int __user *option); void (*keepalive)(struct sock *sk, int valbool); #ifdef CONFIG_COMPAT - int (*compat_setsockopt)(struct sock *sk, - int level, - int optname, char __user *optval, - unsigned int optlen); - int (*compat_getsockopt)(struct sock *sk, - int level, - int optname, char __user *optval, - int __user *option); int (*compat_ioctl)(struct sock *sk, unsigned int cmd, unsigned long arg); #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index d62e24533518..9f7f7c0c1104 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -401,10 +401,6 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); -int compat_tcp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int compat_tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, -- cgit v1.2.3 From a44d9e72100f7044ac46e4e6dc475f5b4097830f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:31 +0200 Subject: net: make ->{get,set}sockopt in proto_ops optional Just check for a NULL method instead of wiring up sock_no_{get,set}sockopt. Signed-off-by: Christoph Hellwig Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- include/net/sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 3bd8bc578bf3..62e18fc8ac9f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1714,8 +1714,6 @@ int sock_no_getname(struct socket *, struct sockaddr *, int); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); int sock_no_shutdown(struct socket *, int); -int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); -int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); -- cgit v1.2.3 From 2f0bc54ba9a85032f2402faf0fccab513848300a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Jul 2020 00:16:29 +0200 Subject: xdp: introduce xdp_get_shared_info_from_{buff, frame} utility routines Introduce xdp_get_shared_info_from_{buff,frame} utility routines to get skb_shared_info from xdp buffer/frame pointer. xdp_get_shared_info_from_{buff,frame} will be used to implement xdp multi-buffer support Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- include/net/xdp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 609f819ed08b..d3005bef812f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -85,6 +85,12 @@ struct xdp_buff { ((xdp)->data_hard_start + (xdp)->frame_sz - \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +static inline struct skb_shared_info * +xdp_get_shared_info_from_buff(struct xdp_buff *xdp) +{ + return (struct skb_shared_info *)xdp_data_hard_end(xdp); +} + struct xdp_frame { void *data; u16 len; @@ -98,6 +104,15 @@ struct xdp_frame { struct net_device *dev_rx; /* used by cpumap */ }; +static inline struct skb_shared_info * +xdp_get_shared_info_from_frame(struct xdp_frame *frame) +{ + void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); + + return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); +} + /* Clear kernel pointers in xdp_frame */ static inline void xdp_scrub_frame(struct xdp_frame *frame) { -- cgit v1.2.3 From eba75c587e811d3249c8bd50d22bb2266ccd3c0f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 10 Jul 2020 09:29:02 -0400 Subject: icmp: support rfc 4884 Add setsockopt SOL_IP/IP_RECVERR_4884 to return the offset to an extension struct if present. ICMP messages may include an extension structure after the original datagram. RFC 4884 standardized this behavior. It stores the offset in words to the extension header in u8 icmphdr.un.reserved[1]. The field is valid only for ICMP types destination unreachable, time exceeded and parameter problem, if length is at least 128 bytes and entire packet does not exceed 576 bytes. Return the offset to the start of the extension struct when reading an ICMP error from the error queue, if it matches the above constraints. Do not return the raw u8 field. Return the offset from the start of the user buffer, in bytes. The kernel does not return the network and transport headers, so subtract those. Also validate the headers. Return the offset regardless of validation, as an invalid extension must still not be misinterpreted as part of the original datagram. Note that !invalid does not imply valid. If the extension version does not match, no validation can take place, for instance. For backward compatibility, make this optional, set by setsockopt SOL_IP/IP_RECVERR_RFC4884. For API example and feature test, see github.com/wdebruij/kerneltools/blob/master/tests/recv_icmp_v2.c For forward compatibility, reserve only setsockopt value 1, leaving other bits for additional icmp extensions. Changes v1->v2: - convert word offset to byte offset from start of user buffer - return in ee_data as u8 may be insufficient - define extension struct and object header structs - return len only if constraints met - if returning len, also validate Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/icmp.h | 4 ++++ include/net/inet_sock.h | 1 + include/uapi/linux/errqueue.h | 14 +++++++++++++- include/uapi/linux/icmp.h | 22 ++++++++++++++++++++++ include/uapi/linux/in.h | 1 + 5 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmp.h b/include/linux/icmp.h index 81ca84ce3119..8fc38a34cb20 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -15,6 +15,7 @@ #include #include +#include static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) { @@ -35,4 +36,7 @@ static inline bool icmp_is_err(int type) return false; } +void ip_icmp_error_rfc4884(const struct sk_buff *skb, + struct sock_ee_data_rfc4884 *out); + #endif /* _LINUX_ICMP_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a7ce00af6c44..a3702d1d4875 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -225,6 +225,7 @@ struct inet_sock { mc_all:1, nodefrag:1; __u8 bind_address_no_port:1, + recverr_rfc4884:1, defer_connect:1; /* Indicates that fastopen_connect is set * and cookie exists so we defer connect * until first data frame is written diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index ca5cb3e3c6df..3c70e8ac14b8 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -5,6 +5,13 @@ #include #include +/* RFC 4884: return offset to extension struct + validation */ +struct sock_ee_data_rfc4884 { + __u16 len; + __u8 flags; + __u8 reserved; +}; + struct sock_extended_err { __u32 ee_errno; __u8 ee_origin; @@ -12,7 +19,10 @@ struct sock_extended_err { __u8 ee_code; __u8 ee_pad; __u32 ee_info; - __u32 ee_data; + union { + __u32 ee_data; + struct sock_ee_data_rfc4884 ee_rfc4884; + }; }; #define SO_EE_ORIGIN_NONE 0 @@ -31,6 +41,8 @@ struct sock_extended_err { #define SO_EE_CODE_TXTIME_INVALID_PARAM 1 #define SO_EE_CODE_TXTIME_MISSED 2 +#define SO_EE_RFC4884_FLAG_INVALID 1 + /** * struct scm_timestamping - timestamps exposed through cmsg * diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index 5589eeb791ca..fb169a50895e 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -19,6 +19,7 @@ #define _UAPI_LINUX_ICMP_H #include +#include #define ICMP_ECHOREPLY 0 /* Echo Reply */ #define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ @@ -95,5 +96,26 @@ struct icmp_filter { __u32 data; }; +/* RFC 4884 extension struct: one per message */ +struct icmp_ext_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 reserved1:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:4, + reserved1:4; +#else +#error "Please fix " +#endif + __u8 reserved2; + __sum16 checksum; +}; + +/* RFC 4884 extension object header: one for each object */ +struct icmp_extobj_hdr { + __be16 length; + __u8 class_num; + __u8 class_type; +}; #endif /* _UAPI_LINUX_ICMP_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 8533bf07450f..3d0d8231dc19 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -123,6 +123,7 @@ struct in_addr { #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ -- cgit v1.2.3 From f65b71aa25a65e13cf3d10445a48c63d3eeb942e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Jul 2020 01:45:29 +0300 Subject: ptp: add ability to configure duty cycle for periodic output There are external event timestampers (PHCs with support for PTP_EXTTS_REQUEST) that timestamp both event edges. When those edges are very close (such as in the case of a short pulse), there is a chance that the collected timestamp might be of the rising, or of the falling edge, we never know. There are also PHCs capable of generating periodic output with a configurable duty cycle. This is good news, because we can space the rising and falling edge out enough in time, that the risks to overrun the 1-entry timestamp FIFO of the extts PHC are lower (example: the perout PHC can be configured for a period of 1 second, and an "on" time of 0.5 seconds, resulting in a duty cycle of 50%). A flag is introduced for signaling that an on time is present in the perout request structure, for preserving compatibility. Logically speaking, the duty cycle cannot exceed 100% and the PTP core checks for this. PHC drivers that don't support this flag emit a periodic output of an unspecified duty cycle, same as before. The duty cycle is encoded as an "on" time, similar to the "start" and "period" times, and reuses the reserved space while preserving overall binary layout. Pahole reported before: struct ptp_perout_request { struct ptp_clock_time start; /* 0 16 */ struct ptp_clock_time period; /* 16 16 */ unsigned int index; /* 32 4 */ unsigned int flags; /* 36 4 */ unsigned int rsv[4]; /* 40 16 */ /* size: 56, cachelines: 1, members: 5 */ /* last cacheline: 56 bytes */ }; And now: struct ptp_perout_request { struct ptp_clock_time start; /* 0 16 */ struct ptp_clock_time period; /* 16 16 */ unsigned int index; /* 32 4 */ unsigned int flags; /* 36 4 */ union { struct ptp_clock_time on; /* 40 16 */ unsigned int rsv[4]; /* 40 16 */ }; /* 40 16 */ /* size: 56, cachelines: 1, members: 5 */ /* last cacheline: 56 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index ff070aa64278..1d2841155f7d 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -53,12 +53,14 @@ /* * Bits of the ptp_perout_request.flags field: */ -#define PTP_PEROUT_ONE_SHOT (1<<0) +#define PTP_PEROUT_ONE_SHOT (1<<0) +#define PTP_PEROUT_DUTY_CYCLE (1<<1) /* * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl. */ -#define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT) +#define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT | \ + PTP_PEROUT_DUTY_CYCLE) /* * No flags are valid for the original PTP_PEROUT_REQUEST ioctl @@ -105,7 +107,16 @@ struct ptp_perout_request { struct ptp_clock_time period; /* Desired period, zero means disable. */ unsigned int index; /* Which channel to configure. */ unsigned int flags; - unsigned int rsv[4]; /* Reserved for future use. */ + union { + /* + * The "on" time of the signal. + * Must be lower than the period. + * Valid only if (flags & PTP_PEROUT_DUTY_CYCLE) is set. + */ + struct ptp_clock_time on; + /* Reserved for future use. */ + unsigned int rsv[4]; + }; }; #define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement samples. */ -- cgit v1.2.3 From b6bd41363a1ca39282496803cc32f7515ed917fe Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Jul 2020 01:45:30 +0300 Subject: ptp: introduce a phase offset in the periodic output request Some PHCs like the ocelot/felix switch cannot emit generic periodic output, but just PPS (pulse per second) signals, which: - don't start from arbitrary absolute times, but are rather phase-aligned to the beginning of [the closest next] second. - have an optional phase offset relative to that beginning of the second. For those, it was initially established that they should reject any other absolute time for the PTP_PEROUT_REQUEST than 0.000000000 [1]. But when it actually came to writing an application [2] that makes use of this functionality, we realized that we can't really deal generically with PHCs that support absolute start time, and with PHCs that don't, without an explicit interface. Namely, in an ideal world, PHC drivers would ensure that the "perout.start" value written to hardware will result in a functional output. This means that if the PTP time has become in the past of this PHC's current time, it should be automatically fast-forwarded by the driver into a close enough future time that is known to work (note: this is necessary only if the hardware doesn't do this fast-forward by itself). But we don't really know what is the status for PHC drivers in use today, so in the general sense, user space would be risking to have a non-functional periodic output if it simply asked for a start time of 0.000000000. So let's introduce a flag for this type of reduced-functionality hardware, named PTP_PEROUT_PHASE. The start time is just "soon", the only thing we know for sure about this signal is that its rising edge events, Rn, occur at: Rn = perout.phase + n * perout.period The "phase" in the periodic output structure is simply an alias to the "start" time, since both cannot logically be specified at the same time. Therefore, the binary layout of the structure is not affected. [1]: https://patchwork.ozlabs.org/project/netdev/patch/20200320103726.32559-7-yangbo.lu@nxp.com/ [2]: https://www.mail-archive.com/linuxptp-devel@lists.sourceforge.net/msg04142.html Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 1d2841155f7d..1d108d597f66 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -55,12 +55,14 @@ */ #define PTP_PEROUT_ONE_SHOT (1<<0) #define PTP_PEROUT_DUTY_CYCLE (1<<1) +#define PTP_PEROUT_PHASE (1<<2) /* * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl. */ #define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT | \ - PTP_PEROUT_DUTY_CYCLE) + PTP_PEROUT_DUTY_CYCLE | \ + PTP_PEROUT_PHASE) /* * No flags are valid for the original PTP_PEROUT_REQUEST ioctl @@ -103,7 +105,20 @@ struct ptp_extts_request { }; struct ptp_perout_request { - struct ptp_clock_time start; /* Absolute start time. */ + union { + /* + * Absolute start time. + * Valid only if (flags & PTP_PEROUT_PHASE) is unset. + */ + struct ptp_clock_time start; + /* + * Phase offset. The signal should start toggling at an + * unspecified integer multiple of the period, plus this value. + * The start time should be "as soon as possible". + * Valid only if (flags & PTP_PEROUT_PHASE) is set. + */ + struct ptp_clock_time phase; + }; struct ptp_clock_time period; /* Desired period, zero means disable. */ unsigned int index; /* Which channel to configure. */ unsigned int flags; -- cgit v1.2.3 From 4cfab3566710826e62adbf100875d2dca32434b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 19 Jul 2020 20:49:52 -0700 Subject: net: dsa: Add wrappers for overloaded ndo_ops Add definitions for the dsa_netdevice_ops structure which is a subset of the net_device_ops structure for the specific operations that we care about overlaying on top of the DSA CPU port net_device and provide inline stubs that take core managing whether DSA code is reachable. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6fa418ff1175..343642ca4f63 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -86,6 +86,18 @@ struct dsa_device_ops { enum dsa_tag_protocol proto; }; +/* This structure defines the control interfaces that are overlayed by the + * DSA layer on top of the DSA CPU/management net_device instance. This is + * used by the core net_device layer while calling various net_device_ops + * function pointers. + */ +struct dsa_netdevice_ops { + int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, + int cmd); + int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, + size_t len); +}; + #define DSA_TAG_DRIVER_ALIAS "dsa_tag-" #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) @@ -217,6 +229,7 @@ struct dsa_port { /* * Original copy of the master netdev net_device_ops */ + const struct dsa_netdevice_ops *netdev_ops; const struct net_device_ops *orig_ndo_ops; bool setup; @@ -679,6 +692,63 @@ static inline bool dsa_can_decode(const struct sk_buff *skb, return false; } +#if IS_ENABLED(CONFIG_NET_DSA) +static inline int __dsa_netdevice_ops_check(struct net_device *dev) +{ + int err = -EOPNOTSUPP; + + if (!dev->dsa_ptr) + return err; + + if (!dev->dsa_ptr->netdev_ops) + return err; + + return 0; +} + +static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + const struct dsa_netdevice_ops *ops; + int err; + + err = __dsa_netdevice_ops_check(dev); + if (err) + return err; + + ops = dev->dsa_ptr->netdev_ops; + + return ops->ndo_do_ioctl(dev, ifr, cmd); +} + +static inline int dsa_ndo_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct dsa_netdevice_ops *ops; + int err; + + err = __dsa_netdevice_ops_check(dev); + if (err) + return err; + + ops = dev->dsa_ptr->netdev_ops; + + return ops->ndo_get_phys_port_name(dev, name, len); +} +#else +static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + return -EOPNOTSUPP; +} + +static inline int dsa_ndo_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + return -EOPNOTSUPP; +} +#endif + void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); -- cgit v1.2.3 From 9c0c7014f38206a2b63e7e832edf2e881a7b49ad Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 19 Jul 2020 20:49:54 -0700 Subject: net: dsa: Setup dsa_netdev_ops Now that we have all the infrastructure in place for calling into the dsa_ptr->netdev_ops function pointers, install them when we configure the DSA CPU/management interface and tear them down. The flow is unchanged from before, but now we preserve equality of tests when network device drivers do tests like dev->netdev_ops == &foo_ops which was not the case before since we were allocating an entirely new structure. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 343642ca4f63..f1b63d06d132 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -230,7 +230,6 @@ struct dsa_port { * Original copy of the master netdev net_device_ops */ const struct dsa_netdevice_ops *netdev_ops; - const struct net_device_ops *orig_ndo_ops; bool setup; }; -- cgit v1.2.3 From a8b7b2d0b3fc965d823e362840e5451e2eb4a71b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 20 Jul 2020 10:10:41 +0200 Subject: sched: sch_api: add missing rcu read lock to silence the warning In case the qdisc_match_from_root function() is called from non-rcu path with rtnl mutex held, a suspiciout rcu usage warning appears: [ 241.504354] ============================= [ 241.504358] WARNING: suspicious RCU usage [ 241.504366] 5.8.0-rc4-custom-01521-g72a7c7d549c3 #32 Not tainted [ 241.504370] ----------------------------- [ 241.504378] net/sched/sch_api.c:270 RCU-list traversed in non-reader section!! [ 241.504382] other info that might help us debug this: [ 241.504388] rcu_scheduler_active = 2, debug_locks = 1 [ 241.504394] 1 lock held by tc/1391: [ 241.504398] #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 [ 241.504431] stack backtrace: [ 241.504440] CPU: 0 PID: 1391 Comm: tc Not tainted 5.8.0-rc4-custom-01521-g72a7c7d549c3 #32 [ 241.504446] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 [ 241.504453] Call Trace: [ 241.504465] dump_stack+0x100/0x184 [ 241.504482] lockdep_rcu_suspicious+0x153/0x15d [ 241.504499] qdisc_match_from_root+0x293/0x350 Fix this by passing the rtnl held lockdep condition down to hlist_for_each_entry_rcu() Reported-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/hashtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 78b6ea5fa8ba..f6c666730b8c 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -173,9 +173,9 @@ static inline void hash_del_rcu(struct hlist_node *node) * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_rcu(name, obj, member, key) \ +#define hash_for_each_possible_rcu(name, obj, member, key, cond...) \ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ - member) + member, ## cond) /** * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing -- cgit v1.2.3 From f1bfd71c8662f20d53e71ef4e18bfb0e5677c27f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 13:36:09 +0200 Subject: arch, net: remove the last csum_partial_copy() leftovers Most of the tree only uses and implements csum_partial_copy_nocheck, but the c6x and lib/checksum.c implement a csum_partial_copy that isn't used anywere except to define csum_partial_copy. Get rid of this pointless alias. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/asm-generic/checksum.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h index 5a80f8e54300..cd8b75aa770d 100644 --- a/include/asm-generic/checksum.h +++ b/include/asm-generic/checksum.h @@ -23,11 +23,9 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary */ -extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum); - #ifndef csum_partial_copy_nocheck -#define csum_partial_copy_nocheck(src, dst, len, sum) \ - csum_partial_copy((src), (dst), (len), (sum)) +__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, + __wsum sum); #endif #ifndef ip_fast_csum -- cgit v1.2.3 From e812916d3278baf03aabf10044661fc3b2848823 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:00 +0300 Subject: linkmode: introduce linkmode_intersects() Add a new helper to find intersections between Ethtool link modes, linkmode_intersects(), similar to the other linkmode helpers. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index c664c27a29a0..f8397f300fcd 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -82,6 +82,12 @@ static inline int linkmode_equal(const unsigned long *src1, return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline int linkmode_intersects(const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_intersects(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + static inline int linkmode_subset(const unsigned long *src1, const unsigned long *src2) { -- cgit v1.2.3 From bdb5d8ec47611ca61e168349f233e1dd1ed063f4 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:01 +0300 Subject: qed, qede, qedf: convert link mode from u32 to ETHTOOL_LINK_MODE Currently qed driver already ran out of 32 bits to store link modes, and this doesn't allow to add and support more speeds. Convert custom link mode to generic Ethtool bitmap and definitions (convenient Phylink shorthands are used for elegance and readability). This allowed us to drop all conversions/mappings between the driver and Ethtool. This involves changes in qede and qedf as well, as they used definitions from shared "qed_if.h". Suggested-by: Andrew Lunn Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 47 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8a6e3ad436d1..f82db1b92d45 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -661,42 +661,6 @@ enum qed_protocol { QED_PROTOCOL_FCOE, }; -enum qed_link_mode_bits { - QED_LM_FIBRE_BIT = BIT(0), - QED_LM_Autoneg_BIT = BIT(1), - QED_LM_Asym_Pause_BIT = BIT(2), - QED_LM_Pause_BIT = BIT(3), - QED_LM_1000baseT_Full_BIT = BIT(4), - QED_LM_10000baseT_Full_BIT = BIT(5), - QED_LM_10000baseKR_Full_BIT = BIT(6), - QED_LM_20000baseKR2_Full_BIT = BIT(7), - QED_LM_25000baseKR_Full_BIT = BIT(8), - QED_LM_40000baseLR4_Full_BIT = BIT(9), - QED_LM_50000baseKR2_Full_BIT = BIT(10), - QED_LM_100000baseKR4_Full_BIT = BIT(11), - QED_LM_TP_BIT = BIT(12), - QED_LM_Backplane_BIT = BIT(13), - QED_LM_1000baseKX_Full_BIT = BIT(14), - QED_LM_10000baseKX4_Full_BIT = BIT(15), - QED_LM_10000baseR_FEC_BIT = BIT(16), - QED_LM_40000baseKR4_Full_BIT = BIT(17), - QED_LM_40000baseCR4_Full_BIT = BIT(18), - QED_LM_40000baseSR4_Full_BIT = BIT(19), - QED_LM_25000baseCR_Full_BIT = BIT(20), - QED_LM_25000baseSR_Full_BIT = BIT(21), - QED_LM_50000baseCR2_Full_BIT = BIT(22), - QED_LM_100000baseSR4_Full_BIT = BIT(23), - QED_LM_100000baseCR4_Full_BIT = BIT(24), - QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25), - QED_LM_50000baseSR2_Full_BIT = BIT(26), - QED_LM_1000baseX_Full_BIT = BIT(27), - QED_LM_10000baseCR_Full_BIT = BIT(28), - QED_LM_10000baseSR_Full_BIT = BIT(29), - QED_LM_10000baseLR_Full_BIT = BIT(30), - QED_LM_10000baseLRM_Full_BIT = BIT(31), - QED_LM_COUNT = 32 -}; - struct qed_link_params { bool link_up; @@ -708,7 +672,9 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) u32 override_flags; bool autoneg; - u32 adv_speeds; + + __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_speeds); + u32 forced_speed; #define QED_LINK_PAUSE_AUTONEG_ENABLE BIT(0) #define QED_LINK_PAUSE_RX_ENABLE BIT(1) @@ -726,10 +692,9 @@ struct qed_link_params { struct qed_link_output { bool link_up; - /* In QED_LM_* defs */ - u32 supported_caps; - u32 advertised_caps; - u32 lp_caps; + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_caps); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised_caps); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_caps); u32 speed; /* In Mb/s */ u8 duplex; /* In DUPLEX defs */ -- cgit v1.2.3 From 37237b5b7104f91b519133d7862d1b5169a3ba8e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:06 +0300 Subject: qed: reformat several structures a bit Prior to adding new fields and bitfields, reformat the related structures according to the Linux style (spaces to tabs, lowercase hex, indentation etc.). Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index f82db1b92d45..dde48f206d0d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -662,51 +662,53 @@ enum qed_protocol { }; struct qed_link_params { - bool link_up; + bool link_up; -#define QED_LINK_OVERRIDE_SPEED_AUTONEG BIT(0) -#define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS BIT(1) -#define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) -#define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) -#define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) -#define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) - u32 override_flags; - bool autoneg; + u32 override_flags; +#define QED_LINK_OVERRIDE_SPEED_AUTONEG BIT(0) +#define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS BIT(1) +#define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) +#define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) +#define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) +#define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) + bool autoneg; __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_speeds); + u32 forced_speed; - u32 forced_speed; -#define QED_LINK_PAUSE_AUTONEG_ENABLE BIT(0) -#define QED_LINK_PAUSE_RX_ENABLE BIT(1) -#define QED_LINK_PAUSE_TX_ENABLE BIT(2) - u32 pause_config; -#define QED_LINK_LOOPBACK_NONE BIT(0) -#define QED_LINK_LOOPBACK_INT_PHY BIT(1) -#define QED_LINK_LOOPBACK_EXT_PHY BIT(2) -#define QED_LINK_LOOPBACK_EXT BIT(3) -#define QED_LINK_LOOPBACK_MAC BIT(4) - u32 loopback_mode; - struct qed_link_eee_params eee; + u32 pause_config; +#define QED_LINK_PAUSE_AUTONEG_ENABLE BIT(0) +#define QED_LINK_PAUSE_RX_ENABLE BIT(1) +#define QED_LINK_PAUSE_TX_ENABLE BIT(2) + + u32 loopback_mode; +#define QED_LINK_LOOPBACK_NONE BIT(0) +#define QED_LINK_LOOPBACK_INT_PHY BIT(1) +#define QED_LINK_LOOPBACK_EXT_PHY BIT(2) +#define QED_LINK_LOOPBACK_EXT BIT(3) +#define QED_LINK_LOOPBACK_MAC BIT(4) + + struct qed_link_eee_params eee; }; struct qed_link_output { - bool link_up; + bool link_up; __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_caps); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised_caps); __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_caps); - u32 speed; /* In Mb/s */ - u8 duplex; /* In DUPLEX defs */ - u8 port; /* In PORT defs */ - bool autoneg; - u32 pause_config; + u32 speed; /* In Mb/s */ + u8 duplex; /* In DUPLEX defs */ + u8 port; /* In PORT defs */ + bool autoneg; + u32 pause_config; /* EEE - capability & param */ - bool eee_supported; - bool eee_active; - u8 sup_caps; - struct qed_link_eee_params eee; + bool eee_supported; + bool eee_active; + u8 sup_caps; + struct qed_link_eee_params eee; }; struct qed_probe_params { -- cgit v1.2.3 From ae7e69379fd5a87141fd8c7f2efab8e73f2a9f7e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:07 +0300 Subject: qed: add support for Forward Error Correction Add all necessary routines for reading supported FEC modes from NVM and querying FEC control to the MFW (if the running version supports it). Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dde48f206d0d..f0b4cdc79299 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -661,6 +661,14 @@ enum qed_protocol { QED_PROTOCOL_FCOE, }; +enum qed_fec_mode { + QED_FEC_MODE_NONE = BIT(0), + QED_FEC_MODE_FIRECODE = BIT(1), + QED_FEC_MODE_RS = BIT(2), + QED_FEC_MODE_AUTO = BIT(3), + QED_FEC_MODE_UNSUPPORTED = BIT(4), +}; + struct qed_link_params { bool link_up; @@ -671,6 +679,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) #define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) #define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) +#define QED_LINK_OVERRIDE_FEC_CONFIG BIT(6) bool autoneg; __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_speeds); @@ -689,6 +698,7 @@ struct qed_link_params { #define QED_LINK_LOOPBACK_MAC BIT(4) struct qed_link_eee_params eee; + u32 fec; }; struct qed_link_output { @@ -709,6 +719,9 @@ struct qed_link_output { bool eee_active; u8 sup_caps; struct qed_link_eee_params eee; + + u32 sup_fec; + u32 active_fec; }; struct qed_probe_params { -- cgit v1.2.3 From 98e675ec5a92a15f6f8ade41eda883cd39df5712 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:13 +0300 Subject: qed: add missing loopback modes These modes are relevant only for several boards, but may be reported by MFW as well as the others. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index f0b4cdc79299..2e780159a5fb 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -696,6 +696,11 @@ struct qed_link_params { #define QED_LINK_LOOPBACK_EXT_PHY BIT(2) #define QED_LINK_LOOPBACK_EXT BIT(3) #define QED_LINK_LOOPBACK_MAC BIT(4) +#define QED_LINK_LOOPBACK_CNIG_AH_ONLY_0123 BIT(5) +#define QED_LINK_LOOPBACK_CNIG_AH_ONLY_2301 BIT(6) +#define QED_LINK_LOOPBACK_PCS_AH_ONLY BIT(7) +#define QED_LINK_LOOPBACK_REVERSE_MAC_AH_ONLY BIT(8) +#define QED_LINK_LOOPBACK_INT_PHY_FEA_AH_ONLY BIT(9) struct qed_link_eee_params eee; u32 fec; -- cgit v1.2.3 From 99785a87fc7d27207c7dca0f0fe04386f1981690 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 20 Jul 2020 21:08:15 +0300 Subject: qed: add support for the extended speed and FEC modes Add all necessary code (NVM parsing, MFW and Ethtool reports etc.) to support extended speed and FEC modes. These new modes are supported by the new boards revisions and newer MFW versions. Misc: correct port type for MEDIA_KR. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 2e780159a5fb..a5c6854343e6 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -594,6 +594,7 @@ enum qed_hw_err_type { enum qed_dev_type { QED_DEV_TYPE_BB, QED_DEV_TYPE_AH, + QED_DEV_TYPE_E5, }; struct qed_dev_info { -- cgit v1.2.3 From bc4f0548f683a3d53359cef15f088d2d5bb4bc39 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:33:58 -0700 Subject: bpf: Compute bpf_skc_to_*() helper socket btf ids at build time Currently, socket types (struct tcp_sock, udp_sock, etc.) used by bpf_skc_to_*() helpers are computed when vmlinux_btf is first built in the kernel. Commit 5a2798ab32ba ("bpf: Add BTF_ID_LIST/BTF_ID/BTF_ID_UNUSED macros") implemented a mechanism to compute btf_ids at kernel build time which can simplify kernel implementation and reduce runtime overhead by removing in-kernel btf_id calculation. This patch did exactly this, removing in-kernel btf_id computation and utilizing build-time btf_id computation. If CONFIG_DEBUG_INFO_BTF is not defined, BTF_ID_LIST will define an array with size of 5, which is not enough for btf_sock_ids. So define its own static array if CONFIG_DEBUG_INFO_BTF is not defined. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163358.1393023-1-yhs@fb.com --- include/linux/bpf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index adb16bdc5f0a..1df1c0fd3f28 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1541,7 +1541,6 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map) struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); -void init_btf_sock_ids(struct btf *btf); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) @@ -1567,9 +1566,6 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) static inline void bpf_map_offload_map_free(struct bpf_map *map) { } -static inline void init_btf_sock_ids(struct btf *btf) -{ -} #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -- cgit v1.2.3 From 0f12e584b241285cf60a6227f3771fa444cfcf76 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:01 -0700 Subject: bpf: Add BTF_ID_LIST_GLOBAL in btf_ids.h Existing BTF_ID_LIST used a local static variable to store btf_ids. This patch provided a new macro BTF_ID_LIST_GLOBAL to store btf_ids in a global variable which can be shared among multiple files. The existing BTF_ID_LIST is still retained. Two reasons. First, BTF_ID_LIST is also used to build btf_ids for helper arguments which typically is an array of 5. Since typically different helpers have different signature, it makes little sense to share them. Second, some current computed btf_ids are indeed local. If later those btf_ids are shared between different files, they can use BTF_ID_LIST_GLOBAL then. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20200720163401.1393159-1-yhs@fb.com --- include/linux/btf_ids.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 1cdb56950ffe..77ab45baa095 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -57,17 +57,20 @@ asm( \ * .zero 4 * */ -#define __BTF_ID_LIST(name) \ +#define __BTF_ID_LIST(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -".local " #name "; \n" \ +"." #scope " " #name "; \n" \ #name ":; \n" \ ".popsection; \n"); \ #define BTF_ID_LIST(name) \ -__BTF_ID_LIST(name) \ +__BTF_ID_LIST(name, local) \ extern u32 name[]; +#define BTF_ID_LIST_GLOBAL(name) \ +__BTF_ID_LIST(name, globl) + /* * The BTF_ID_UNUSED macro defines 4 zero bytes. * It's used when we want to define 'unused' entry @@ -90,6 +93,7 @@ asm( \ #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED +#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From fce557bcef119a1bc5ab3cb02678cf454bcaf424 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:02 -0700 Subject: bpf: Make btf_sock_ids global tcp and udp bpf_iter can reuse some socket ids in btf_sock_ids, so make it global. I put the extern definition in btf_ids.h as a central place so it can be easily discovered by developers. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163402.1393427-1-yhs@fb.com --- include/linux/btf_ids.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 77ab45baa095..4867d549e3c1 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -97,4 +97,34 @@ asm( \ #endif /* CONFIG_DEBUG_INFO_BTF */ +#ifdef CONFIG_NET +/* Define a list of socket types which can be the argument for + * skc_to_*_sock() helpers. All these sockets should have + * sock_common as the first argument in its memory layout. + */ +#define BTF_SOCK_TYPE_xxx \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) + +enum { +#define BTF_SOCK_TYPE(name, str) name, +BTF_SOCK_TYPE_xxx +#undef BTF_SOCK_TYPE +MAX_BTF_SOCK_TYPE, +}; + +extern u32 btf_sock_ids[]; +#endif + #endif -- cgit v1.2.3 From 951cf368bcb11d6f817709660cf5cd914072c36f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:03 -0700 Subject: bpf: net: Use precomputed btf_id for bpf iterators One additional field btf_id is added to struct bpf_ctx_arg_aux to store the precomputed btf_ids. The btf_id is computed at build time with BTF_ID_LIST or BTF_ID_LIST_GLOBAL macro definitions. All existing bpf iterators are changed to used pre-compute btf_ids. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163403.1393551-1-yhs@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1df1c0fd3f28..bae557ff2da8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -668,6 +668,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + u32 btf_id; }; struct bpf_prog_aux { -- cgit v1.2.3 From 1571e700fd610c39e8b50b0110b1ee9badb2fe6a Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Jul 2020 12:04:36 +0100 Subject: net: phylink: in-band pause mode advertisement update for PCS Re-code the pause in-band advertisement update in light of the addition of PCS support, so that we perform the minimum required; only the PCS configuration function needs to be called in this case, followed by the request to trigger a restart of negotiation if the programmed advertisement changed. We need to change the pcs_config() signature to pass whether resolved pause should be passed to the MAC for setups such as mvneta and mvpp2 where doing so overrides the MAC manual flow controls. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index b32b8b45421b..d9913d8e6b91 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -286,7 +286,8 @@ struct phylink_pcs_ops { struct phylink_link_state *state); int (*pcs_config)(struct phylink_config *config, unsigned int mode, phy_interface_t interface, - const unsigned long *advertising); + const unsigned long *advertising, + bool permit_pause_to_mac); void (*pcs_an_restart)(struct phylink_config *config); void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, phy_interface_t interface, int speed, int duplex); @@ -317,9 +318,11 @@ void pcs_get_state(struct phylink_config *config, * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. * @interface: interface mode to be used * @advertising: adertisement ethtool link mode mask + * @permit_pause_to_mac: permit forwarding pause resolution to MAC * * Configure the PCS for the operating mode, the interface mode, and set - * the advertisement mask. + * the advertisement mask. @permit_pause_to_mac indicates whether the + * hardware may forward the pause mode resolution to the MAC. * * When operating in %MLO_AN_INBAND, inband should always be enabled, * otherwise inband should be disabled. -- cgit v1.2.3 From b7ad14c2fe2d4b2abee491e3adfa3d0123aa2d8c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Jul 2020 12:04:41 +0100 Subject: net: phylink: re-implement interface configuration with PCS With PCS support, how we implement interface reconfiguration (or other major reconfiguration) is not up to the job; we end up reconfiguring the PCS for an interface change while the link could potentially be up. In order to solve this, add two additional MAC methods for major configuration, one to prepare for the change, and one to finish the change. This allows mvneta and mvpp2 to shutdown what they require prior to the MAC and PCS configuration calls, and then restart as appropriate. This impacts ksettings_set(), which now needs to identify whether the change is a minor tweak to the advertisement masks or whether the interface mode has changed, and call the appropriate function for that update. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d9913d8e6b91..2f1315f32113 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -76,7 +76,9 @@ struct phylink_config { * struct phylink_mac_ops - MAC operations structure. * @validate: Validate and update the link configuration. * @mac_pcs_get_state: Read the current link state from the hardware. + * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. + * @mac_finish: finish a major reconfiguration of the interface. * @mac_an_restart: restart 802.3z BaseX autonegotiation. * @mac_link_down: take the link down. * @mac_link_up: allow the link to come up. @@ -89,8 +91,12 @@ struct phylink_mac_ops { struct phylink_link_state *state); void (*mac_pcs_get_state)(struct phylink_config *config, struct phylink_link_state *state); + int (*mac_prepare)(struct phylink_config *config, unsigned int mode, + phy_interface_t iface); void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); + int (*mac_finish)(struct phylink_config *config, unsigned int mode, + phy_interface_t iface); void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); @@ -145,6 +151,31 @@ void validate(struct phylink_config *config, unsigned long *supported, void mac_pcs_get_state(struct phylink_config *config, struct phylink_link_state *state); +/** + * mac_prepare() - prepare to change the PHY interface mode + * @config: a pointer to a &struct phylink_config. + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @iface: interface mode to switch to + * + * phylink will call this method at the beginning of a full initialisation + * of the link, which includes changing the interface mode or at initial + * startup time. It may be called for the current mode. The MAC driver + * should perform whatever actions are required, e.g. disabling the + * Serdes PHY. + * + * This will be the first call in the sequence: + * - mac_prepare() + * - mac_config() + * - pcs_config() + * - possible pcs_an_restart() + * - mac_finish() + * + * Returns zero on success, or negative errno on failure which will be + * reported to the kernel log. + */ +int mac_prepare(struct phylink_config *config, unsigned int mode, + phy_interface_t iface); + /** * mac_config() - configure the MAC for the selected mode and state * @config: a pointer to a &struct phylink_config. @@ -220,6 +251,23 @@ void mac_pcs_get_state(struct phylink_config *config, void mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); +/** + * mac_finish() - finish a to change the PHY interface mode + * @config: a pointer to a &struct phylink_config. + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @iface: interface mode to switch to + * + * phylink will call this if it called mac_prepare() to allow the MAC to + * complete any necessary steps after the MAC and PCS have been configured + * for the @mode and @iface. E.g. a MAC driver may wish to re-enable the + * Serdes PHY here if it was previously disabled by mac_prepare(). + * + * Returns zero on success, or negative errno on failure which will be + * reported to the kernel log. + */ +int mac_finish(struct phylink_config *config, unsigned int mode, + phy_interface_t iface); + /** * mac_an_restart() - restart 802.3z BaseX autonegotiation * @config: a pointer to a &struct phylink_config. -- cgit v1.2.3 From 7137e18f6f889a67046d5004e1690a32d7d2108d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Jul 2020 12:04:46 +0100 Subject: net: phylink: add struct phylink_pcs Add a way for MAC PCS to have private data while keeping independence from struct phylink_config, which is used for the MAC itself. We need this independence as we will have stand-alone code for PCS that is independent of the MAC. Introduce struct phylink_pcs, which is designed to be embedded in a driver private data structure. This structure does not include a mdio_device as there are PCS implementations such as the Marvell DSA and network drivers where this is not necessary. Reviewed-by: Ioana Ciornei Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2f1315f32113..057f78263a46 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -321,6 +321,21 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, int speed, int duplex, bool tx_pause, bool rx_pause); #endif +struct phylink_pcs_ops; + +/** + * struct phylink_pcs - PHYLINK PCS instance + * @ops: a pointer to the &struct phylink_pcs_ops structure + * @poll: poll the PCS for link changes + * + * This structure is designed to be embedded within the PCS private data, + * and will be passed between phylink and the PCS. + */ +struct phylink_pcs { + const struct phylink_pcs_ops *ops; + bool poll; +}; + /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_get_state: read the current MAC PCS link state from the hardware. @@ -330,21 +345,21 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, * (where necessary). */ struct phylink_pcs_ops { - void (*pcs_get_state)(struct phylink_config *config, + void (*pcs_get_state)(struct phylink_pcs *pcs, struct phylink_link_state *state); - int (*pcs_config)(struct phylink_config *config, unsigned int mode, + int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac); - void (*pcs_an_restart)(struct phylink_config *config); - void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, + void (*pcs_an_restart)(struct phylink_pcs *pcs); + void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); }; #if 0 /* For kernel-doc purposes only. */ /** * pcs_get_state() - Read the current inband link state from the hardware - * @config: a pointer to a &struct phylink_config. + * @pcs: a pointer to a &struct phylink_pcs. * @state: a pointer to a &struct phylink_link_state. * * Read the current inband link state from the MAC PCS, reporting the @@ -357,12 +372,12 @@ struct phylink_pcs_ops { * When present, this overrides mac_pcs_get_state() in &struct * phylink_mac_ops. */ -void pcs_get_state(struct phylink_config *config, +void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); /** * pcs_config() - Configure the PCS mode and advertisement - * @config: a pointer to a &struct phylink_config. + * @pcs: a pointer to a &struct phylink_pcs. * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. * @interface: interface mode to be used * @advertising: adertisement ethtool link mode mask @@ -382,21 +397,21 @@ void pcs_get_state(struct phylink_config *config, * * For most 10GBASE-R, there is no advertisement. */ -int (*pcs_config)(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, const unsigned long *advertising); +int pcs_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, const unsigned long *advertising); /** * pcs_an_restart() - restart 802.3z BaseX autonegotiation - * @config: a pointer to a &struct phylink_config. + * @pcs: a pointer to a &struct phylink_pcs. * * When PCS ops are present, this overrides mac_an_restart() in &struct * phylink_mac_ops. */ -void (*pcs_an_restart)(struct phylink_config *config); +void pcs_an_restart(struct phylink_pcs *pcs); /** * pcs_link_up() - program the PCS for the resolved link configuration - * @config: a pointer to a &struct phylink_config. + * @pcs: a pointer to a &struct phylink_pcs. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -407,14 +422,14 @@ void (*pcs_an_restart)(struct phylink_config *config); * mode without in-band AN needs to be manually configured for the link * and duplex setting. Otherwise, this should be a no-op. */ -void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, int speed, int duplex); +void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex); #endif struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); -void phylink_add_pcs(struct phylink *, const struct phylink_pcs_ops *ops); +void phylink_set_pcs(struct phylink *, struct phylink_pcs *pcs); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From 93eaceb0fcf87b7f70924f9da3ec27e3c73be53d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Jul 2020 12:04:51 +0100 Subject: net: phylink: add interface to configure clause 22 PCS PHY Add an interface to configure the advertisement for a clause 22 PCS PHY, and set the AN enable flag in the BMCR appropriately. Reviewed-by: Ioana Ciornei Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 057f78263a46..1aad2aea4610 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -478,6 +478,9 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, phy_interface_t interface, const unsigned long *advertising); +int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, -- cgit v1.2.3 From 336ce1c93293e1e606fbc557587b1a1f8630cd5c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 21 Jul 2020 19:53:53 +0300 Subject: devlink: Add comment for devlink instance lock Add comment to describe the purpose of devlink instance lock. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 913e8679ae35..19d990c8edcc 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -40,7 +40,9 @@ struct devlink { struct xarray snapshot_ids; struct device *dev; possible_net_t _net; - struct mutex lock; + struct mutex lock; /* Serializes access to devlink instance specific objects such as + * port, sb, dpipe, resource, params, region, traps and more. + */ u8 reload_failed:1, reload_enabled:1, registered:1; -- cgit v1.2.3 From 35dfb013149f74c2be1ff9c78f14e6a3cd1539d1 Mon Sep 17 00:00:00 2001 From: Andrew Sy Kim Date: Wed, 8 Jul 2020 12:16:38 -0400 Subject: ipvs: queue delayed work to expire no destination connections if expire_nodest_conn=1 When expire_nodest_conn=1 and a destination is deleted, IPVS does not expire the existing connections until the next matching incoming packet. If there are many connection entries from a single client to a single destination, many packets may get dropped before all the connections are expired (more likely with lots of UDP traffic). An optimization can be made where upon deletion of a destination, IPVS queues up delayed work to immediately expire any connections with a deleted destination. This ensures any reused source ports from a client (within the IPVS timeouts) are scheduled to new real servers instead of silently dropped. Signed-off-by: Andrew Sy Kim Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 011f407b76fe..9a59a33787cb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -14,6 +14,7 @@ #include /* for struct rwlock_t */ #include /* for struct atomic_t */ #include /* for struct refcount_t */ +#include #include #include @@ -886,6 +887,8 @@ struct netns_ipvs { atomic_t conn_out_counter; #ifdef CONFIG_SYSCTL + /* delayed work for expiring no dest connections */ + struct delayed_work expire_nodest_conn_work; /* 1/rate drop and drop-entry variables */ struct delayed_work defense_work; /* Work handler */ int drop_rate; @@ -1051,6 +1054,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return ipvs->sysctl_conn_reuse_mode; } +static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_expire_nodest_conn; +} + static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) { return ipvs->sysctl_schedule_icmp; @@ -1138,6 +1146,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return 0; +} + static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) { return 0; @@ -1507,6 +1520,22 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } #endif +#ifdef CONFIG_SYSCTL +/* Enqueue delayed work for expiring no dest connections + * Only run when sysctl_expire_nodest=1 + */ +static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) +{ + if (sysctl_expire_nodest_conn(ipvs)) + queue_delayed_work(system_long_wq, + &ipvs->expire_nodest_conn_work, 1); +} + +void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs); +#else +static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {} +#endif + #define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \ IP_VS_CONN_F_FWD_MASK) -- cgit v1.2.3 From 4787dd582dbde0b7f29eb3dbe59df3da1b350925 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Fri, 17 Jul 2020 08:05:12 +0530 Subject: bareudp: Reverted support to enable & disable rx metadata collection The commit fe80536acf83 ("bareudp: Added attribute to enable & disable rx metadata collection") breaks the the original(5.7) default behavior of bareudp module to collect RX metadadata at the receive. It was added to avoid the crash at the kernel neighbour subsytem when packet with metadata from bareudp is processed. But it is no more needed as the commit 394de110a733 ("net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb") solves this crash. Fixes: fe80536acf83 ("bareudp: Added attribute to enable & disable rx metadata collection") Signed-off-by: Martin Varghese Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/bareudp.h | 1 - include/uapi/linux/if_link.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/bareudp.h b/include/net/bareudp.h index 3dd5f9a8d01c..dc65a0d71d9b 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -12,7 +12,6 @@ struct bareudp_conf { __be16 port; u16 sport_min; bool multi_proto_mode; - bool rx_collect_metadata; }; struct net_device *bareudp_dev_create(struct net *net, const char *name, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 26842ffd0501..af8f31987526 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -601,7 +601,6 @@ enum { IFLA_BAREUDP_ETHERTYPE, IFLA_BAREUDP_SRCPORT_MIN, IFLA_BAREUDP_MULTIPROTO_MODE, - IFLA_BAREUDP_RX_COLLECT_METADATA, __IFLA_BAREUDP_MAX }; -- cgit v1.2.3 From 2cf2f4f546f1dea54b63302a7eb28d1fe15f1e28 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:31 +0300 Subject: qed: reformat "qed_chain.h" a bit Reformat structs and macros definitions a bit prior to making functional changes. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 126 ++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 7071dc92b4e2..087073517c09 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -26,9 +26,9 @@ enum qed_chain_mode { }; enum qed_chain_use_mode { - QED_CHAIN_USE_TO_PRODUCE, /* Chain starts empty */ - QED_CHAIN_USE_TO_CONSUME, /* Chain starts full */ - QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */ + QED_CHAIN_USE_TO_PRODUCE, /* Chain starts empty */ + QED_CHAIN_USE_TO_CONSUME, /* Chain starts full */ + QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */ }; enum qed_chain_cnt_type { @@ -40,84 +40,86 @@ enum qed_chain_cnt_type { }; struct qed_chain_next { - struct regpair next_phys; - void *next_virt; + struct regpair next_phys; + void *next_virt; }; struct qed_chain_pbl_u16 { - u16 prod_page_idx; - u16 cons_page_idx; + u16 prod_page_idx; + u16 cons_page_idx; }; struct qed_chain_pbl_u32 { - u32 prod_page_idx; - u32 cons_page_idx; + u32 prod_page_idx; + u32 cons_page_idx; }; struct qed_chain_ext_pbl { - dma_addr_t p_pbl_phys; - void *p_pbl_virt; + dma_addr_t p_pbl_phys; + void *p_pbl_virt; }; struct qed_chain_u16 { /* Cyclic index of next element to produce/consme */ - u16 prod_idx; - u16 cons_idx; + u16 prod_idx; + u16 cons_idx; }; struct qed_chain_u32 { /* Cyclic index of next element to produce/consme */ - u32 prod_idx; - u32 cons_idx; + u32 prod_idx; + u32 cons_idx; }; struct addr_tbl_entry { - void *virt_addr; - dma_addr_t dma_map; + void *virt_addr; + dma_addr_t dma_map; }; struct qed_chain { - /* fastpath portion of the chain - required for commands such + /* Fastpath portion of the chain - required for commands such * as produce / consume. */ + /* Point to next element to produce/consume */ - void *p_prod_elem; - void *p_cons_elem; + void *p_prod_elem; + void *p_cons_elem; /* Fastpath portions of the PBL [if exists] */ + struct { /* Table for keeping the virtual and physical addresses of the * chain pages, respectively to the physical addresses * in the pbl table. */ - struct addr_tbl_entry *pp_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl; union { - struct qed_chain_pbl_u16 u16; - struct qed_chain_pbl_u32 u32; - } c; - } pbl; + struct qed_chain_pbl_u16 u16; + struct qed_chain_pbl_u32 u32; + } c; + } pbl; union { - struct qed_chain_u16 chain16; - struct qed_chain_u32 chain32; - } u; + struct qed_chain_u16 chain16; + struct qed_chain_u32 chain32; + } u; /* Capacity counts only usable elements */ - u32 capacity; - u32 page_cnt; + u32 capacity; + u32 page_cnt; - enum qed_chain_mode mode; + enum qed_chain_mode mode; /* Elements information for fast calculations */ - u16 elem_per_page; - u16 elem_per_page_mask; - u16 elem_size; - u16 next_page_mask; - u16 usable_per_page; - u8 elem_unusable; + u16 elem_per_page; + u16 elem_per_page_mask; + u16 elem_size; + u16 next_page_mask; + u16 usable_per_page; + u8 elem_unusable; - u8 cnt_type; + u8 cnt_type; /* Slowpath of the chain - required for initialization and destruction, * but isn't involved in regular functionality. @@ -125,43 +127,47 @@ struct qed_chain { /* Base address of a pre-allocated buffer for pbl */ struct { - dma_addr_t p_phys_table; - void *p_virt_table; - } pbl_sp; + dma_addr_t p_phys_table; + void *p_virt_table; + } pbl_sp; /* Address of first page of the chain - the address is required * for fastpath operation [consume/produce] but only for the SINGLE * flavour which isn't considered fastpath [== SPQ]. */ - void *p_virt_addr; - dma_addr_t p_phys_addr; + void *p_virt_addr; + dma_addr_t p_phys_addr; /* Total number of elements [for entire chain] */ - u32 size; + u32 size; - u8 intended_use; + u8 intended_use; - bool b_external_pbl; + bool b_external_pbl; }; -#define QED_CHAIN_PBL_ENTRY_SIZE (8) -#define QED_CHAIN_PAGE_SIZE (0x1000) -#define ELEMS_PER_PAGE(elem_size) (QED_CHAIN_PAGE_SIZE / (elem_size)) +#define QED_CHAIN_PBL_ENTRY_SIZE 8 +#define QED_CHAIN_PAGE_SIZE 0x1000 + +#define ELEMS_PER_PAGE(elem_size) \ + (QED_CHAIN_PAGE_SIZE / (elem_size)) -#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ - (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \ - (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \ - (elem_size))) : 0) +#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ + (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \ + (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / (elem_size))) : \ + 0) -#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ - ((u32)(ELEMS_PER_PAGE(elem_size) - \ - UNUSABLE_ELEMS_PER_PAGE(elem_size, mode))) +#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ + ((u32)(ELEMS_PER_PAGE(elem_size) - \ + UNUSABLE_ELEMS_PER_PAGE((elem_size), (mode)))) -#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \ - DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode)) +#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \ + DIV_ROUND_UP((elem_cnt), USABLE_ELEMS_PER_PAGE((elem_size), (mode))) -#define is_chain_u16(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16) -#define is_chain_u32(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32) +#define is_chain_u16(p) \ + ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16) +#define is_chain_u32(p) \ + ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32) /* Accessors */ static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain) -- cgit v1.2.3 From 9b6ee3cf95d322ab02e9927f5b08ebc870ca9f1f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:35 +0300 Subject: qed: sanitize PBL chains allocation PBL chain elements are actually DMA addresses stored in __le64, but currently their size is hardcoded to 8, and DMA addresses are assigned via cast to variable-sized dma_addr_t without any bitwise conversions. Change the type of pbl_virt array to match the actual one, add a new field to store the size of allocated DMA memory and sanitize elements assignment. Misc: give more logic names to the members of qed_chain::pbl_sp embedded struct. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 087073517c09..265e0b671a5c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -127,8 +127,9 @@ struct qed_chain { /* Base address of a pre-allocated buffer for pbl */ struct { - dma_addr_t p_phys_table; - void *p_virt_table; + __le64 *table_virt; + dma_addr_t table_phys; + size_t table_size; } pbl_sp; /* Address of first page of the chain - the address is required @@ -146,7 +147,6 @@ struct qed_chain { bool b_external_pbl; }; -#define QED_CHAIN_PBL_ENTRY_SIZE 8 #define QED_CHAIN_PAGE_SIZE 0x1000 #define ELEMS_PER_PAGE(elem_size) \ @@ -236,7 +236,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain) static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) { - return p_chain->pbl_sp.p_phys_table; + return p_chain->pbl_sp.table_phys; } /** @@ -527,8 +527,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->capacity = p_chain->usable_per_page * page_cnt; p_chain->size = p_chain->elem_per_page * page_cnt; - p_chain->pbl_sp.p_phys_table = 0; - p_chain->pbl_sp.p_virt_table = NULL; + p_chain->pbl_sp.table_phys = 0; + p_chain->pbl_sp.table_virt = NULL; p_chain->pbl.pp_addr_tbl = NULL; } @@ -569,8 +569,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, dma_addr_t p_phys_pbl, struct addr_tbl_entry *pp_addr_tbl) { - p_chain->pbl_sp.p_phys_table = p_phys_pbl; - p_chain->pbl_sp.p_virt_table = p_virt_pbl; + p_chain->pbl_sp.table_phys = p_phys_pbl; + p_chain->pbl_sp.table_virt = p_virt_pbl; p_chain->pbl.pp_addr_tbl = pp_addr_tbl; } -- cgit v1.2.3 From 5e776d8016119e13c27fbb6e87c9e1fd6f8b2a75 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:36 +0300 Subject: qed: move chain initialization inlines next to allocation functions qed_chain_init*() are used in one file/place on "cold" path only, so they can be uninlined and moved next to the call sites. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 112 ------------------------------------------ 1 file changed, 112 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 265e0b671a5c..a0d83095dc73 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -490,118 +490,6 @@ static inline void qed_chain_reset(struct qed_chain *p_chain) } } -/** - * @brief qed_chain_init - Initalizes a basic chain struct - * - * @param p_chain - * @param p_virt_addr - * @param p_phys_addr physical address of allocated buffer's beginning - * @param page_cnt number of pages in the allocated buffer - * @param elem_size size of each element in the chain - * @param intended_use - * @param mode - */ -static inline void qed_chain_init_params(struct qed_chain *p_chain, - u32 page_cnt, - u8 elem_size, - enum qed_chain_use_mode intended_use, - enum qed_chain_mode mode, - enum qed_chain_cnt_type cnt_type) -{ - /* chain fixed parameters */ - p_chain->p_virt_addr = NULL; - p_chain->p_phys_addr = 0; - p_chain->elem_size = elem_size; - p_chain->intended_use = (u8)intended_use; - p_chain->mode = mode; - p_chain->cnt_type = (u8)cnt_type; - - p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); - p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; - p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->next_page_mask = (p_chain->usable_per_page & - p_chain->elem_per_page_mask); - - p_chain->page_cnt = page_cnt; - p_chain->capacity = p_chain->usable_per_page * page_cnt; - p_chain->size = p_chain->elem_per_page * page_cnt; - - p_chain->pbl_sp.table_phys = 0; - p_chain->pbl_sp.table_virt = NULL; - p_chain->pbl.pp_addr_tbl = NULL; -} - -/** - * @brief qed_chain_init_mem - - * - * Initalizes a basic chain struct with its chain buffers - * - * @param p_chain - * @param p_virt_addr virtual address of allocated buffer's beginning - * @param p_phys_addr physical address of allocated buffer's beginning - * - */ -static inline void qed_chain_init_mem(struct qed_chain *p_chain, - void *p_virt_addr, dma_addr_t p_phys_addr) -{ - p_chain->p_virt_addr = p_virt_addr; - p_chain->p_phys_addr = p_phys_addr; -} - -/** - * @brief qed_chain_init_pbl_mem - - * - * Initalizes a basic chain struct with its pbl buffers - * - * @param p_chain - * @param p_virt_pbl pointer to a pre allocated side table which will hold - * virtual page addresses. - * @param p_phys_pbl pointer to a pre-allocated side table which will hold - * physical page addresses. - * @param pp_virt_addr_tbl - * pointer to a pre-allocated side table which will hold - * the virtual addresses of the chain pages. - * - */ -static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, - void *p_virt_pbl, - dma_addr_t p_phys_pbl, - struct addr_tbl_entry *pp_addr_tbl) -{ - p_chain->pbl_sp.table_phys = p_phys_pbl; - p_chain->pbl_sp.table_virt = p_virt_pbl; - p_chain->pbl.pp_addr_tbl = pp_addr_tbl; -} - -/** - * @brief qed_chain_init_next_ptr_elem - - * - * Initalizes a next pointer element - * - * @param p_chain - * @param p_virt_curr virtual address of a chain page of which the next - * pointer element is initialized - * @param p_virt_next virtual address of the next chain page - * @param p_phys_next physical address of the next chain page - * - */ -static inline void -qed_chain_init_next_ptr_elem(struct qed_chain *p_chain, - void *p_virt_curr, - void *p_virt_next, dma_addr_t p_phys_next) -{ - struct qed_chain_next *p_next; - u32 size; - - size = p_chain->elem_size * p_chain->usable_per_page; - p_next = (struct qed_chain_next *)((u8 *)p_virt_curr + size); - - DMA_REGPAIR_LE(p_next->next_phys, p_phys_next); - - p_next->next_virt = p_virt_next; -} - /** * @brief qed_chain_get_last_elem - * -- cgit v1.2.3 From b6db3f71c976ea92361dbc7ebfb65db666ac9f02 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:38 +0300 Subject: qed: simplify chain allocation with init params struct To simplify qed_chain_alloc() prototype and call sites, introduce struct qed_chain_init_params to specify chain params, and pass a pointer to filled struct to the actual qed_chain_alloc() instead of a long list of separate arguments. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 21 ++++++++++++++------- include/linux/qed/qed_if.h | 9 ++------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index a0d83095dc73..f5cfee0934e5 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -54,11 +54,6 @@ struct qed_chain_pbl_u32 { u32 cons_page_idx; }; -struct qed_chain_ext_pbl { - dma_addr_t p_pbl_phys; - void *p_pbl_virt; -}; - struct qed_chain_u16 { /* Cyclic index of next element to produce/consme */ u16 prod_idx; @@ -119,7 +114,7 @@ struct qed_chain { u16 usable_per_page; u8 elem_unusable; - u8 cnt_type; + enum qed_chain_cnt_type cnt_type; /* Slowpath of the chain - required for initialization and destruction, * but isn't involved in regular functionality. @@ -142,11 +137,23 @@ struct qed_chain { /* Total number of elements [for entire chain] */ u32 size; - u8 intended_use; + enum qed_chain_use_mode intended_use; bool b_external_pbl; }; +struct qed_chain_init_params { + enum qed_chain_mode mode; + enum qed_chain_use_mode intended_use; + enum qed_chain_cnt_type cnt_type; + + u32 num_elems; + size_t elem_size; + + void *ext_pbl_virt; + dma_addr_t ext_pbl_phys; +}; + #define QED_CHAIN_PAGE_SIZE 0x1000 #define ELEMS_PER_PAGE(elem_size) \ diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index a5c6854343e6..cd6a5c7e56eb 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -948,13 +948,8 @@ struct qed_common_ops { u8 dp_level); int (*chain_alloc)(struct qed_dev *cdev, - enum qed_chain_use_mode intended_use, - enum qed_chain_mode mode, - enum qed_chain_cnt_type cnt_type, - u32 num_elems, - size_t elem_size, - struct qed_chain *p_chain, - struct qed_chain_ext_pbl *ext_pbl); + struct qed_chain *chain, + struct qed_chain_init_params *params); void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); -- cgit v1.2.3 From 155065866bc36f20061c55fd2ca287a466911b16 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:39 +0300 Subject: qed: add support for different page sizes for chains Extend current infrastructure to store chain page size in a struct and use it in all functions instead of fixed QED_CHAIN_PAGE_SIZE. Its value remains the default one, but can be overridden in qed_chain_init_params before chain allocation. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index f5cfee0934e5..8a96c361cc19 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -120,6 +121,8 @@ struct qed_chain { * but isn't involved in regular functionality. */ + u32 page_size; + /* Base address of a pre-allocated buffer for pbl */ struct { __le64 *table_virt; @@ -147,6 +150,7 @@ struct qed_chain_init_params { enum qed_chain_use_mode intended_use; enum qed_chain_cnt_type cnt_type; + u32 page_size; u32 num_elems; size_t elem_size; @@ -154,22 +158,23 @@ struct qed_chain_init_params { dma_addr_t ext_pbl_phys; }; -#define QED_CHAIN_PAGE_SIZE 0x1000 +#define QED_CHAIN_PAGE_SIZE SZ_4K -#define ELEMS_PER_PAGE(elem_size) \ - (QED_CHAIN_PAGE_SIZE / (elem_size)) +#define ELEMS_PER_PAGE(elem_size, page_size) \ + ((page_size) / (elem_size)) #define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \ (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / (elem_size))) : \ 0) -#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ - ((u32)(ELEMS_PER_PAGE(elem_size) - \ +#define USABLE_ELEMS_PER_PAGE(elem_size, page_size, mode) \ + ((u32)(ELEMS_PER_PAGE((elem_size), (page_size)) - \ UNUSABLE_ELEMS_PER_PAGE((elem_size), (mode)))) -#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \ - DIV_ROUND_UP((elem_cnt), USABLE_ELEMS_PER_PAGE((elem_size), (mode))) +#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, page_size, mode) \ + DIV_ROUND_UP((elem_cnt), \ + USABLE_ELEMS_PER_PAGE((elem_size), (page_size), (mode))) #define is_chain_u16(p) \ ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16) @@ -604,7 +609,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) for (i = 0; i < page_cnt; i++) memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0, - QED_CHAIN_PAGE_SIZE); + p_chain->page_size); } #endif -- cgit v1.2.3 From f2aefd20b02d83b8565c867c38eedd88853967e9 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:40 +0300 Subject: qed: optimize common chain accessors Constify chain pointers and refactor qed_chain_get_elem_left{,u32}() a bit. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 60 ++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 8a96c361cc19..434479e2ab65 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -182,73 +182,79 @@ struct qed_chain_init_params { ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32) /* Accessors */ -static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain) + +static inline u16 qed_chain_get_prod_idx(const struct qed_chain *chain) +{ + return chain->u.chain16.prod_idx; +} + +static inline u16 qed_chain_get_cons_idx(const struct qed_chain *chain) { - return p_chain->u.chain16.prod_idx; + return chain->u.chain16.cons_idx; } -static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain) +static inline u32 qed_chain_get_prod_idx_u32(const struct qed_chain *chain) { - return p_chain->u.chain16.cons_idx; + return chain->u.chain32.prod_idx; } -static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) +static inline u32 qed_chain_get_cons_idx_u32(const struct qed_chain *chain) { - return p_chain->u.chain32.cons_idx; + return chain->u.chain32.cons_idx; } -static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) +static inline u16 qed_chain_get_elem_left(const struct qed_chain *chain) { - u16 elem_per_page = p_chain->elem_per_page; - u32 prod = p_chain->u.chain16.prod_idx; - u32 cons = p_chain->u.chain16.cons_idx; + u32 prod = qed_chain_get_prod_idx(chain); + u32 cons = qed_chain_get_cons_idx(chain); + u16 elem_per_page = chain->elem_per_page; u16 used; if (prod < cons) prod += (u32)U16_MAX + 1; used = (u16)(prod - cons); - if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= prod / elem_per_page - cons / elem_per_page; + if (chain->mode == QED_CHAIN_MODE_NEXT_PTR) + used -= (u16)(prod / elem_per_page - cons / elem_per_page); - return (u16)(p_chain->capacity - used); + return (u16)(chain->capacity - used); } -static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) +static inline u32 qed_chain_get_elem_left_u32(const struct qed_chain *chain) { - u16 elem_per_page = p_chain->elem_per_page; - u64 prod = p_chain->u.chain32.prod_idx; - u64 cons = p_chain->u.chain32.cons_idx; + u64 prod = qed_chain_get_prod_idx_u32(chain); + u64 cons = qed_chain_get_cons_idx_u32(chain); + u16 elem_per_page = chain->elem_per_page; u32 used; if (prod < cons) prod += (u64)U32_MAX + 1; used = (u32)(prod - cons); - if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) + if (chain->mode == QED_CHAIN_MODE_NEXT_PTR) used -= (u32)(prod / elem_per_page - cons / elem_per_page); - return p_chain->capacity - used; + return chain->capacity - used; } -static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain) +static inline u16 qed_chain_get_usable_per_page(const struct qed_chain *chain) { - return p_chain->usable_per_page; + return chain->usable_per_page; } -static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) +static inline u8 qed_chain_get_unusable_per_page(const struct qed_chain *chain) { - return p_chain->elem_unusable; + return chain->elem_unusable; } -static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain) +static inline u32 qed_chain_get_page_cnt(const struct qed_chain *chain) { - return p_chain->page_cnt; + return chain->page_cnt; } -static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) +static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain) { - return p_chain->pbl_sp.table_phys; + return chain->pbl_sp.table_phys; } /** -- cgit v1.2.3 From be0cec6ffd686364bbba2796bbe5eee2fad18181 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 23 Jul 2020 01:10:41 +0300 Subject: qed: introduce qed_chain_get_elem_used{,u32}() Add reverse-variants of qed_chain_get_elem_left{,u32}() to be able to know current chain occupation. They will be used in the upcoming qede XDP_REDIRECT code. They share most of the logics with the mentioned ones, so were reused to collapse the latters. Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 434479e2ab65..4d58dc8943f0 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -203,7 +203,7 @@ static inline u32 qed_chain_get_cons_idx_u32(const struct qed_chain *chain) return chain->u.chain32.cons_idx; } -static inline u16 qed_chain_get_elem_left(const struct qed_chain *chain) +static inline u16 qed_chain_get_elem_used(const struct qed_chain *chain) { u32 prod = qed_chain_get_prod_idx(chain); u32 cons = qed_chain_get_cons_idx(chain); @@ -217,10 +217,15 @@ static inline u16 qed_chain_get_elem_left(const struct qed_chain *chain) if (chain->mode == QED_CHAIN_MODE_NEXT_PTR) used -= (u16)(prod / elem_per_page - cons / elem_per_page); - return (u16)(chain->capacity - used); + return used; } -static inline u32 qed_chain_get_elem_left_u32(const struct qed_chain *chain) +static inline u16 qed_chain_get_elem_left(const struct qed_chain *chain) +{ + return (u16)(chain->capacity - qed_chain_get_elem_used(chain)); +} + +static inline u32 qed_chain_get_elem_used_u32(const struct qed_chain *chain) { u64 prod = qed_chain_get_prod_idx_u32(chain); u64 cons = qed_chain_get_cons_idx_u32(chain); @@ -234,7 +239,12 @@ static inline u32 qed_chain_get_elem_left_u32(const struct qed_chain *chain) if (chain->mode == QED_CHAIN_MODE_NEXT_PTR) used -= (u32)(prod / elem_per_page - cons / elem_per_page); - return chain->capacity - used; + return used; +} + +static inline u32 qed_chain_get_elem_left_u32(const struct qed_chain *chain) +{ + return chain->capacity - qed_chain_get_elem_used_u32(chain); } static inline u16 qed_chain_get_usable_per_page(const struct qed_chain *chain) -- cgit v1.2.3 From 5df5661a1387e829c901d009cdd1fccc376cdb74 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 23 Jul 2020 01:43:12 +0300 Subject: net: dsa: stop overriding master's ndo_get_phys_port_name The purpose of this override is to give the user an indication of what the number of the CPU port is (in DSA, the CPU port is a hardware implementation detail and not a network interface capable of traffic). However, it has always failed (by design) at providing this information to the user in a reliable fashion. Prior to commit 3369afba1e46 ("net: Call into DSA netdevice_ops wrappers"), the behavior was to only override this callback if it was not provided by the DSA master. That was its first failure: if the DSA master itself was a DSA port or a switchdev, then the user would not see the number of the CPU port in /sys/class/net/eth0/phys_port_name, but the number of the DSA master port within its respective physical switch. But that was actually ok in a way. The commit mentioned above changed that behavior, and now overrides the master's ndo_get_phys_port_name unconditionally. That comes with problems of its own, which are worse in a way. The idea is that it's typical for switchdev users to have udev rules for consistent interface naming. These are based, among other things, on the phys_port_name attribute. If we let the DSA switch at the bottom to start randomly overriding ndo_get_phys_port_name with its own CPU port, we basically lose any predictability in interface naming, or even uniqueness, for that matter. So, there are reasons to let DSA override the master's callback (to provide a consistent interface, a number which has a clear meaning and must not be interpreted according to context), and there are reasons to not let DSA override it (it breaks udev matching for the DSA master). But, there is an alternative method for users to retrieve the number of the CPU port of each DSA switch in the system: $ devlink port pci/0000:00:00.5/0: type eth netdev swp0 flavour physical port 0 pci/0000:00:00.5/2: type eth netdev swp2 flavour physical port 2 pci/0000:00:00.5/4: type notset flavour cpu port 4 spi/spi2.0/0: type eth netdev sw0p0 flavour physical port 0 spi/spi2.0/1: type eth netdev sw0p1 flavour physical port 1 spi/spi2.0/2: type eth netdev sw0p2 flavour physical port 2 spi/spi2.0/4: type notset flavour cpu port 4 spi/spi2.1/0: type eth netdev sw1p0 flavour physical port 0 spi/spi2.1/1: type eth netdev sw1p1 flavour physical port 1 spi/spi2.1/2: type eth netdev sw1p2 flavour physical port 2 spi/spi2.1/3: type eth netdev sw1p3 flavour physical port 3 spi/spi2.1/4: type notset flavour cpu port 4 So remove this duplicated, unreliable and troublesome method. From this patch on, the phys_port_name attribute of the DSA master will only contain information about itself (if at all). If the users need reliable information about the CPU port they're probably using devlink anyway. Signed-off-by: Vladimir Oltean Acked-by: florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index f1b63d06d132..75c8fac82017 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -94,8 +94,6 @@ struct dsa_device_ops { struct dsa_netdevice_ops { int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, - size_t len); }; #define DSA_TAG_DRIVER_ALIAS "dsa_tag-" @@ -719,33 +717,12 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, return ops->ndo_do_ioctl(dev, ifr, cmd); } - -static inline int dsa_ndo_get_phys_port_name(struct net_device *dev, - char *name, size_t len) -{ - const struct dsa_netdevice_ops *ops; - int err; - - err = __dsa_netdevice_ops_check(dev); - if (err) - return err; - - ops = dev->dsa_ptr->netdev_ops; - - return ops->ndo_get_phys_port_name(dev, name, len); -} #else static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { return -EOPNOTSUPP; } - -static inline int dsa_ndo_get_phys_port_name(struct net_device *dev, - char *name, size_t len) -{ - return -EOPNOTSUPP; -} #endif void dsa_unregister_switch(struct dsa_switch *ds); -- cgit v1.2.3 From 0cb09aff9d49d92305c3969fc84b785117412968 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 23 Jul 2020 01:03:00 +0300 Subject: net/flow_dissector: add packet hash dissection Retreive a hash value from the SKB and store it in the dissector key for future matching. Signed-off-by: Ariel Levkovich Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/net/flow_dissector.h | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6a82d4a8229e..fa817a105517 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1342,6 +1342,10 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +void skb_flow_dissect_hash(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 4b6e36288ddd..cc10b10dc3a1 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -243,6 +243,14 @@ struct flow_dissector_key_ct { u32 ct_labels[4]; }; +/** + * struct flow_dissector_key_hash: + * @hash: hash value + */ +struct flow_dissector_key_hash { + u32 hash; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -271,6 +279,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ + FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.2.3 From 5923b8f7fa218a9bccd730c0a9692635eb2fc740 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 23 Jul 2020 01:03:01 +0300 Subject: net/sched: cls_flower: Add hash info to flow classification Adding new cls flower keys for hash value and hash mask and dissect the hash info from the skb into the flow key towards flow classication. Signed-off-by: Ariel Levkovich Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7576209d96f9..ee95f42fb0ec 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -578,6 +578,9 @@ enum { TCA_FLOWER_KEY_MPLS_OPTS, + TCA_FLOWER_KEY_HASH, /* u32 */ + TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From ba423fdaa589d972473083defedf9e862626d268 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:46 +0200 Subject: net: add a new sockptr_t type Add a uptr_t type that can hold a pointer to either a user or kernel memory region, and simply helpers to copy to and from it. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 include/linux/sockptr.h (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h new file mode 100644 index 000000000000..700856e13ea0 --- /dev/null +++ b/include/linux/sockptr.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Christoph Hellwig. + * + * Support for "universal" pointers that can point to either kernel or userspace + * memory. + */ +#ifndef _LINUX_SOCKPTR_H +#define _LINUX_SOCKPTR_H + +#include +#include + +typedef struct { + union { + void *kernel; + void __user *user; + }; + bool is_kernel : 1; +} sockptr_t; + +static inline bool sockptr_is_kernel(sockptr_t sockptr) +{ + return sockptr.is_kernel; +} + +static inline sockptr_t KERNEL_SOCKPTR(void *p) +{ + return (sockptr_t) { .kernel = p, .is_kernel = true }; +} + +static inline sockptr_t USER_SOCKPTR(void __user *p) +{ + return (sockptr_t) { .user = p }; +} + +static inline bool sockptr_is_null(sockptr_t sockptr) +{ + return !sockptr.user && !sockptr.kernel; +} + +static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) +{ + if (!sockptr_is_kernel(src)) + return copy_from_user(dst, src.user, size); + memcpy(dst, src.kernel, size); + return 0; +} + +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) +{ + if (!sockptr_is_kernel(dst)) + return copy_to_user(dst.user, src, size); + memcpy(dst.kernel, src, size); + return 0; +} + +static inline void *memdup_sockptr(sockptr_t src, size_t len) +{ + void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); + + if (!p) + return ERR_PTR(-ENOMEM); + if (copy_from_sockptr(p, src, len)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + return p; +} + +static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) +{ + char *p = kmalloc_track_caller(len + 1, GFP_KERNEL); + + if (!p) + return ERR_PTR(-ENOMEM); + if (copy_from_sockptr(p, src, len)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + p[len] = '\0'; + return p; +} + +static inline void sockptr_advance(sockptr_t sockptr, size_t len) +{ + if (sockptr_is_kernel(sockptr)) + sockptr.kernel += len; + else + sockptr.user += len; +} + +static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) +{ + if (sockptr_is_kernel(src)) { + size_t len = min(strnlen(src.kernel, count - 1) + 1, count); + + memcpy(dst, src.kernel, len); + return len; + } + return strncpy_from_user(dst, src.user, count); +} + +#endif /* _LINUX_SOCKPTR_H */ -- cgit v1.2.3 From b1ea9ff6aff2deae84eccaf0a07cd14912669680 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:47 +0200 Subject: net: switch copy_bpf_fprog_from_user to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1c6b6d982bf4..d07a6e973a7d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1276,7 +1277,7 @@ struct bpf_sockopt_kern { s32 retval; }; -int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len); +int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); struct bpf_sk_lookup_kern { u16 family; -- cgit v1.2.3 From c8c1bbb6eb498109286739f8b6090e99313dd104 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:50 +0200 Subject: net: switch sock_set_timeout to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Acked-by: Matthieu Baerts Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 62e18fc8ac9f..bfb2fe2fc368 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -1669,7 +1670,7 @@ void sock_pfree(struct sk_buff *skb); #endif int sock_setsockopt(struct socket *sock, int level, int op, - char __user *optval, unsigned int optlen); + sockptr_t optval, unsigned int optlen); int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); -- cgit v1.2.3 From c6d1b26a8fd4940fe5d4199311838f6c2aef0174 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:51 +0200 Subject: net/xfrm: switch xfrm_user_policy to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/xfrm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f9e1fda82ddf..5e81868b574a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1609,10 +1610,11 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); -int xfrm_user_policy(struct sock *sk, int optname, - u8 __user *optval, int optlen); +int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, + int optlen); #else -static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) +static inline int xfrm_user_policy(struct sock *sk, int optname, + sockptr_t optval, int optlen) { return -ENOPROTOOPT; } -- cgit v1.2.3 From ab214d1bf8c7ef1ed7af803a72491cb29edfa8f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:53 +0200 Subject: netfilter: switch xt_copy_counters to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b8b943ee7b8b..5deb099d156d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -301,8 +301,8 @@ int xt_target_to_user(const struct xt_entry_target *t, int xt_data_to_user(void __user *dst, const void *src, int usersize, int size, int aligned_size); -void *xt_copy_counters_from_user(const void __user *user, unsigned int len, - struct xt_counters_info *info); +void *xt_copy_counters(sockptr_t arg, unsigned int len, + struct xt_counters_info *info); struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, -- cgit v1.2.3 From c2f12630c60ff33a9cafd221646053fc10ec59b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:54 +0200 Subject: netfilter: switch nf_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netfilter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 711b4d4486f0..0101747de549 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -13,6 +13,7 @@ #include #include #include +#include #include static inline int NF_DROP_GETERR(int verdict) @@ -163,7 +164,8 @@ struct nf_sockopt_ops { /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; int set_optmax; - int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len); + int (*set)(struct sock *sk, int optval, sockptr_t arg, + unsigned int len); int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); @@ -338,7 +340,7 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, } /* Call setsockopt() */ -int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -- cgit v1.2.3 From b03afaa82ece13b2a008f0e3a7127bead578e3e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:55 +0200 Subject: bpfilter: switch bpfilter_ip_set_sockopt to sockptr_t This is mostly to prepare for cleaning up the callers, as bpfilter by design can't handle kernel pointers. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/bpfilter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index 9b114c718a76..2ae3c8e1d83c 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -4,9 +4,10 @@ #include #include +#include struct sock; -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); @@ -16,8 +17,7 @@ struct bpfilter_umh_ops { struct umd_info info; /* since ip_getsockopt() can run in parallel, serialize access to umh */ struct mutex lock; - int (*sockopt)(struct sock *sk, int optname, - char __user *optval, + int (*sockopt)(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen, bool is_set); int (*start)(void); }; -- cgit v1.2.3 From 01ccb5b48f08f59ca3746d59221f4990aec1b194 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:56 +0200 Subject: net/ipv4: switch ip_mroute_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/mroute.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 9a36fad9e068..6cbbfe94348c 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) @@ -15,7 +16,7 @@ static inline int ip_mroute_opt(int opt) return opt >= MRT_BASE && opt <= MRT_MAX; } -int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); +int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); @@ -23,7 +24,7 @@ int ip_mr_init(void); bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { return -ENOPROTOOPT; } -- cgit v1.2.3 From de40a3e88311b6f0fc79b876a4768bf2d99f9aae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:57 +0200 Subject: net/ipv4: merge ip_options_get and ip_options_get_from_user Use the sockptr_t type to merge the versions. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/ip.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 3d34acc95ca8..d66ad3a95220 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -707,9 +708,7 @@ int __ip_options_compile(struct net *net, struct ip_options *opt, int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, - unsigned char *data, int optlen); -int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, - unsigned char __user *data, int optlen); + sockptr_t data, int optlen); void ip_options_undo(struct ip_options *opt); void ip_forward_options(struct sk_buff *skb); int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); -- cgit v1.2.3 From b43c6153132c92745317f92174af84f57b160b76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:59 +0200 Subject: net/ipv6: switch ip6_mroute_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/mroute6.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index c4a45859f586..bc351a85ce9b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #ifdef CONFIG_IPV6_MROUTE @@ -25,7 +26,7 @@ static inline int ip6_mroute_opt(int opt) struct sock; #ifdef CONFIG_IPV6_MROUTE -extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); +extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); @@ -33,9 +34,8 @@ extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *ar extern int ip6_mr_init(void); extern void ip6_mr_cleanup(void); #else -static inline -int ip6_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, unsigned int optlen) +static inline int ip6_mroute_setsockopt(struct sock *sock, int optname, + sockptr_t optval, unsigned int optlen) { return -ENOPROTOOPT; } -- cgit v1.2.3 From 86298285c9ae3a41ce21c2d00ebdde51dd2abc73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:01 +0200 Subject: net/ipv6: switch ipv6_flowlabel_opt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Note that the get case is pretty weird in that it actually copies data back to userspace from setsockopt. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 262fc88dbd7e..4c9d89b5d732 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -406,7 +406,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, struct ipv6_txoptions *fopt); void fl6_free_socklist(struct sock *sk); -int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen); +int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen); int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); -- cgit v1.2.3 From 91ac1ccaff597d06b1e16801e1a4c99b8a78dcbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:04 +0200 Subject: net/udp: switch udp_lib_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 17a9e86a8076..295d52a73598 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -306,7 +306,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int udp_lib_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen, + sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)); struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -- cgit v1.2.3 From d4c19c49142ddb2efcc34cff6379d03edb3553bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:05 +0200 Subject: net/tcp: switch ->md5_parse to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f7f7c0c1104..e3c8e1d82021 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2002,7 +2002,7 @@ struct tcp_sock_af_ops { const struct sk_buff *skb); int (*md5_parse)(struct sock *sk, int optname, - char __user *optval, + sockptr_t optval, int optlen); #endif }; -- cgit v1.2.3 From a7b75c5a8c41445f33efb663887ff5f5c3b4454b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:07 +0200 Subject: net: pass a sockptr_t into ->setsockopt Rework the remaining setsockopt code to pass a sockptr_t instead of a plain user pointer. This removes the last remaining set_fs(KERNEL_DS) outside of architecture specific code. Signed-off-by: Christoph Hellwig Acked-by: Stefan Schmidt [ieee802154] Acked-by: Matthieu Baerts Signed-off-by: David S. Miller --- include/linux/net.h | 4 +++- include/net/inet_connection_sock.h | 3 ++- include/net/ip.h | 2 +- include/net/ipv6.h | 4 ++-- include/net/sctp/structs.h | 2 +- include/net/sock.h | 4 ++-- include/net/tcp.h | 4 ++-- 7 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 858ff1d98154..d48ff1180879 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -162,7 +163,8 @@ struct proto_ops { int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, unsigned int optlen); + int optname, sockptr_t optval, + unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 157c60cca0ca..1e209ce7d1bd 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,7 +46,7 @@ struct inet_connection_sock_af_ops { u16 net_frag_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); + sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); diff --git a/include/net/ip.h b/include/net/ip.h index d66ad3a95220..b09c48d862cc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -722,7 +722,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); -int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, +int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4c9d89b5d732..bd1f396cc9c7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1084,8 +1084,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, * socket options (ipv6_sockglue.c) */ -int ipv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 233bbf7df5d6..b33f1aefad09 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -431,7 +431,7 @@ struct sctp_af { int (*setsockopt) (struct sock *sk, int level, int optname, - char __user *optval, + sockptr_t optval, unsigned int optlen); int (*getsockopt) (struct sock *sk, int level, diff --git a/include/net/sock.h b/include/net/sock.h index bfb2fe2fc368..2cc3ba667908 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1141,7 +1141,7 @@ struct proto { void (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); int (*setsockopt)(struct sock *sk, int level, - int optname, char __user *optval, + int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, @@ -1734,7 +1734,7 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen); + sockptr_t optval, unsigned int optlen); void sk_common_release(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index e3c8e1d82021..e0c35d56091f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -399,8 +399,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, -- cgit v1.2.3 From 6d04fe15f78acdf8e32329e208552e226f7a8ae6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:08 +0200 Subject: net: optimize the sockptr_t for unified kernel/user address spaces For architectures like x86 and arm64 we don't need the separate bit to indicate that a pointer is a kernel pointer as the address spaces are unified. That way the sockptr_t can be reduced to a union of two pointers, which leads to nicer calling conventions. The only caveat is that we need to check that users don't pass in kernel address and thus gain access to kernel memory. Thus the USER_SOCKPTR helper is replaced with a init_user_sockptr function that does this check and returns an error if it fails. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 700856e13ea0..7d5cdb2b30b5 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,9 +8,34 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H +#include #include #include +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE +typedef union { + void *kernel; + void __user *user; +} sockptr_t; + +static inline bool sockptr_is_kernel(sockptr_t sockptr) +{ + return (unsigned long)sockptr.kernel >= TASK_SIZE; +} + +static inline sockptr_t KERNEL_SOCKPTR(void *p) +{ + return (sockptr_t) { .kernel = p }; +} + +static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p) +{ + if ((unsigned long)p >= TASK_SIZE) + return -EFAULT; + sp->user = p; + return 0; +} +#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -29,10 +54,13 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) return (sockptr_t) { .kernel = p, .is_kernel = true }; } -static inline sockptr_t USER_SOCKPTR(void __user *p) +static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p) { - return (sockptr_t) { .user = p }; + sp->user = p; + sp->is_kernel = false; + return 0; } +#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ static inline bool sockptr_is_null(sockptr_t sockptr) { -- cgit v1.2.3 From 178c49d9f9a4b5ade00c93480d714708fe971e24 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:09 -0400 Subject: icmp: prepare rfc 4884 for ipv6 The RFC 4884 spec is largely the same between IPv4 and IPv6. Factor out the IPv4 specific parts in preparation for IPv6 support: - icmp types supported - icmp header size, and thus offset to original datagram start - datagram length field offset in icmp(6)hdr. - datagram length field word size: 4B for IPv4, 8B for IPv6. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/icmp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/icmp.h b/include/linux/icmp.h index 8fc38a34cb20..0af4d210ee31 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -37,6 +37,7 @@ static inline bool icmp_is_err(int type) } void ip_icmp_error_rfc4884(const struct sk_buff *skb, - struct sock_ee_data_rfc4884 *out); + struct sock_ee_data_rfc4884 *out, + int thlen, int off); #endif /* _LINUX_ICMP_H */ -- cgit v1.2.3 From 01370434df85eb76ecb1527a4466013c4aca2436 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:10 -0400 Subject: icmp6: support rfc 4884 Extend the rfc 4884 read interface introduced for ipv4 in commit eba75c587e81 ("icmp: support rfc 4884") to ipv6. Add socket option SOL_IPV6/IPV6_RECVERR_RFC4884. Changes v1->v2: - make ipv6_icmp_error_rfc4884 static (file scope) Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/icmpv6.h | 1 + include/uapi/linux/in6.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8d8f877e7f81..a44789d027cc 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -283,6 +283,7 @@ struct ipv6_pinfo { autoflowlabel:1, autoflowlabel_set:1, mc_all:1, + recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 2622b5a3e616..c1661febc2dc 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -68,6 +68,7 @@ struct icmp6hdr { #define icmp6_mtu icmp6_dataun.un_data32[0] #define icmp6_unused icmp6_dataun.un_data32[0] #define icmp6_maxdelay icmp6_dataun.un_data16[0] +#define icmp6_datagram_len icmp6_dataun.un_data8[0] #define icmp6_router icmp6_dataun.u_nd_advt.router #define icmp6_solicited icmp6_dataun.u_nd_advt.solicited #define icmp6_override icmp6_dataun.u_nd_advt.override diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 9f2273a08356..5ad396a57eb3 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req { #define IPV6_LEAVE_ANYCAST 28 #define IPV6_MULTICAST_ALL 29 #define IPV6_ROUTER_ALERT_ISOLATE 30 +#define IPV6_RECVERR_RFC4884 31 /* IPV6_MTU_DISCOVER values */ #define IPV6_PMTUDISC_DONT 0 -- cgit v1.2.3 From a228a64fc1e4428e2b96dc68e9ad3c447095c9e7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Jul 2020 18:10:18 -0700 Subject: bpf: Add bpf_prog iterator It's mostly a copy paste of commit 6086d29def80 ("bpf: Add bpf_map iterator") that is use to implement bpf_seq_file opreations to traverse all bpf programs. v1->v2: Tweak to use build time btf_id Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Daniel Borkmann --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bae557ff2da8..72221aea1c60 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1117,6 +1117,7 @@ int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); struct bpf_map *bpf_map_get_curr_or_next(u32 *id); +struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From 14fc6bd6b79c430f615500d0fe6cea4722110db8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:09 -0700 Subject: bpf: Refactor bpf_iter_reg to have separate seq_info member There is no functionality change for this patch. Struct bpf_iter_reg is used to register a bpf_iter target, which includes information for both prog_load, link_create and seq_file creation. This patch puts fields related seq_file creation into a different structure. This will be useful for map elements iterator where one iterator covers different map types and different map types may have different seq_ops, init/fini private_data function and private_data size. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184109.590030-1-yhs@fb.com --- include/linux/bpf.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 72221aea1c60..127067f71fd4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,15 @@ struct seq_operations; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); +typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +struct bpf_iter_seq_info { + const struct seq_operations *seq_ops; + bpf_iter_init_seq_priv_t init_seq_private; + bpf_iter_fini_seq_priv_t fini_seq_private; + u32 seq_priv_size; +}; + /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ @@ -1189,18 +1198,12 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } -typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); -typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); - #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - const struct seq_operations *seq_ops; - bpf_iter_init_seq_priv_t init_seq_private; - bpf_iter_fini_seq_priv_t fini_seq_private; - u32 seq_priv_size; u32 ctx_arg_info_size; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; + const struct bpf_iter_seq_info *seq_info; }; struct bpf_iter_meta { -- cgit v1.2.3 From f9c792729581bd8b8473af163e8ab426c2c61d89 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:10 -0700 Subject: bpf: Refactor to provide aux info to bpf_iter_init_seq_priv_t This patch refactored target bpf_iter_init_seq_priv_t callback function to accept additional information. This will be needed in later patches for map element targets since a particular map should be passed to traverse elements for that particular map. In the future, other information may be passed to target as well, e.g., pid, cgroup id, etc. to customize the iterator. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184110.590156-1-yhs@fb.com --- include/linux/bpf.h | 7 ++++++- include/linux/proc_fs.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 127067f71fd4..ef52717336cf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -33,11 +33,13 @@ struct btf; struct btf_type; struct exception_table_entry; struct seq_operations; +struct bpf_iter_aux_info; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; -typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, + struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); struct bpf_iter_seq_info { const struct seq_operations *seq_ops; @@ -1198,6 +1200,9 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +struct bpf_iter_aux_info { +}; + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d1eed1b43651..2df965cd0974 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); -extern int bpf_iter_init_seq_net(void *priv_data); +struct bpf_iter_aux_info; +extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux); extern void bpf_iter_fini_seq_net(void *priv_data); #ifdef CONFIG_PROC_PID_ARCH_STATUS -- cgit v1.2.3 From afbf21dce668ef59482037596eaffbe5041e094c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:11 -0700 Subject: bpf: Support readonly/readwrite buffers in verifier Readonly and readwrite buffer register states are introduced. Totally four states, PTR_TO_RDONLY_BUF[_OR_NULL] and PTR_TO_RDWR_BUF[_OR_NULL] are supported. As suggested by their respective names, PTR_TO_RDONLY_BUF[_OR_NULL] are for readonly buffers and PTR_TO_RDWR_BUF[_OR_NULL] for read/write buffers. These new register states will be used by later bpf map element iterator. New register states share some similarity to PTR_TO_TP_BUFFER as it will calculate accessed buffer size during verification time. The accessed buffer size will be later compared to other metrics during later attach/link_create time. Similar to reg_state PTR_TO_BTF_ID_OR_NULL in bpf iterator programs, PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL reg_types can be set at prog->aux->bpf_ctx_arg_aux, and bpf verifier will retrieve the values during btf_ctx_access(). Later bpf map element iterator implementation will show how such information will be assigned during target registeration time. The verifier is also enhanced such that PTR_TO_RDONLY_BUF can be passed to ARG_PTR_TO_MEM[_OR_NULL] helper argument, and PTR_TO_RDWR_BUF can be passed to ARG_PTR_TO_MEM[_OR_NULL] or ARG_PTR_TO_UNINIT_MEM. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184111.590274-1-yhs@fb.com --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ef52717336cf..f9c4bb08f616 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -353,6 +353,10 @@ enum bpf_reg_type { PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ + PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ + PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ + PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ + PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -694,6 +698,8 @@ struct bpf_prog_aux { u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; + u32 max_rdonly_access; + u32 max_rdwr_access; const struct bpf_ctx_arg_aux *ctx_arg_info; struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ -- cgit v1.2.3 From a5cbe05a6673b85bed2a63ffcfea6a96c6410cff Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:12 -0700 Subject: bpf: Implement bpf iterator for map elements The bpf iterator for map elements are implemented. The bpf program will receive four parameters: bpf_iter_meta *meta: the meta data bpf_map *map: the bpf_map whose elements are traversed void *key: the key of one element void *value: the value of the same element Here, meta and map pointers are always valid, and key has register type PTR_TO_RDONLY_BUF_OR_NULL and value has register type PTR_TO_RDWR_BUF_OR_NULL. The kernel will track the access range of key and value during verification time. Later, these values will be compared against the values in the actual map to ensure all accesses are within range. A new field iter_seq_info is added to bpf_map_ops which is used to add map type specific information, i.e., seq_ops, init/fini seq_file func and seq_file private data size. Subsequent patches will have actual implementation for bpf_map_ops->iter_seq_info. In user space, BPF_ITER_LINK_MAP_FD needs to be specified in prog attr->link_create.flags, which indicates that attr->link_create.target_fd is a map_fd. The reason for such an explicit flag is for possible future cases where one bpf iterator may allow more than one possible customization, e.g., pid and cgroup id for task_file. Current kernel internal implementation only allows the target to register at most one required bpf_iter_link_info. To support the above case, optional bpf_iter_link_info's are needed, the target can be extended to register such link infos, and user provided link_info needs to match one of target supported ones. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com --- include/linux/bpf.h | 16 ++++++++++++++++ include/uapi/linux/bpf.h | 7 +++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9c4bb08f616..4175cf1f4665 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -107,6 +107,9 @@ struct bpf_map_ops { /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; + + /* bpf_iter info used to open a seq_file */ + const struct bpf_iter_seq_info *iter_seq_info; }; struct bpf_map_memory { @@ -1207,12 +1210,18 @@ int bpf_obj_get_user(const char __user *pathname, int flags); int __init bpf_iter_ ## target(args) { return 0; } struct bpf_iter_aux_info { + struct bpf_map *map; }; +typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux); + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; + bpf_iter_check_target_t check_target; u32 ctx_arg_info_size; + enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; @@ -1223,6 +1232,13 @@ struct bpf_iter_meta { u64 seq_num; }; +struct bpf_iter__bpf_map_elem { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(void *, key); + __bpf_md_ptr(void *, value); +}; + int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54d0c886e3ba..828c2f6438f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -246,6 +246,13 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. -- cgit v1.2.3 From 7b04d6d60fcfb5b2200ffebb9cfb90927bdfeec7 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 23 Jul 2020 11:06:44 -0700 Subject: bpf: Separate bpf_get_[stack|stackid] for perf events BPF Calling get_perf_callchain() on perf_events from PEBS entries may cause unwinder errors. To fix this issue, the callchain is fetched early. Such perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY. Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may also cause unwinder errors. To fix this, add separate version of these two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in bpf_perf_event_data_kern->data->callchain. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723180648.1429892-2-songliubraving@fb.com --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4175cf1f4665..8357be349133 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1675,6 +1675,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_stackid_proto_pe; +extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; -- cgit v1.2.3 From 5d99cb2c86775b4780c02a339a9578bf9471ead9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 23 Jul 2020 11:06:45 -0700 Subject: bpf: Fail PERF_EVENT_IOC_SET_BPF when bpf_get_[stack|stackid] cannot work bpf_get_[stack|stackid] on perf_events with precise_ip uses callchain attached to perf_sample_data. If this callchain is not presented, do not allow attaching BPF program that calls bpf_get_[stack|stackid] to this event. In the error case, -EPROTO is returned so that libbpf can identify this error and print proper hint message. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723180648.1429892-3-songliubraving@fb.com --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index d07a6e973a7d..0a355b005bf4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -533,7 +533,8 @@ struct bpf_prog { is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ - enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */ + enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ + call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ -- cgit v1.2.3 From 7d9c3427894fe70d1347b4820476bf37736d2ff0 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 23 Jul 2020 23:47:43 -0500 Subject: bpf: Make cgroup storages shared between programs on the same cgroup This change comes in several parts: One, the restriction that the CGROUP_STORAGE map can only be used by one program is removed. This results in the removal of the field 'aux' in struct bpf_cgroup_storage_map, and removal of relevant code associated with the field, and removal of now-noop functions bpf_free_cgroup_storage and bpf_cgroup_storage_release. Second, we permit a key of type u64 as the key to the map. Providing such a key type indicates that the map should ignore attach type when comparing map keys. However, for simplicity newly linked storage will still have the attach type at link time in its key struct. cgroup_storage_check_btf is adapted to accept u64 as the type of the key. Third, because the storages are now shared, the storages cannot be unconditionally freed on program detach. There could be two ways to solve this issue: * A. Reference count the usage of the storages, and free when the last program is detached. * B. Free only when the storage is impossible to be referred to again, i.e. when either the cgroup_bpf it is attached to, or the map itself, is freed. Option A has the side effect that, when the user detach and reattach a program, whether the program gets a fresh storage depends on whether there is another program attached using that storage. This could trigger races if the user is multi-threaded, and since nondeterminism in data races is evil, go with option B. The both the map and the cgroup_bpf now tracks their associated storages, and the storage unlink and free are removed from cgroup_bpf_detach and added to cgroup_bpf_release and cgroup_storage_map_free. The latter also new holds the cgroup_mutex to prevent any races with the former. Fourth, on attach, we reuse the old storage if the key already exists in the map, via cgroup_storage_lookup. If the storage does not exist yet, we create a new one, and publish it at the last step in the attach process. This does not create a race condition because for the whole attach the cgroup_mutex is held. We keep track of an array of new storages that was allocated and if the process fails only the new storages would get freed. Signed-off-by: YiFei Zhu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/d5401c6106728a00890401190db40020a1f84ff1.1595565795.git.zhuyifei@google.com --- include/linux/bpf-cgroup.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2c6f26670acc..64f367044e25 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -46,7 +46,8 @@ struct bpf_cgroup_storage { }; struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_key key; - struct list_head list; + struct list_head list_map; + struct list_head list_cg; struct rb_node node; struct rcu_head rcu; }; @@ -78,6 +79,9 @@ struct cgroup_bpf { struct list_head progs[MAX_BPF_ATTACH_TYPE]; u32 flags[MAX_BPF_ATTACH_TYPE]; + /* list of cgroup shared storages */ + struct list_head storages; + /* temp storage for effective prog array used by prog_attach/detach */ struct bpf_prog_array *inactive; @@ -161,6 +165,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); } +struct bpf_cgroup_storage * +cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, + void *key, bool locked); struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, enum bpf_cgroup_storage_type stype); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); @@ -169,7 +176,6 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, enum bpf_attach_type type); void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); -void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map); int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, @@ -383,8 +389,6 @@ static inline void bpf_cgroup_storage_set( struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } -static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, - struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } static inline void bpf_cgroup_storage_free( -- cgit v1.2.3 From 6cc7d1e8e9e06d45f9d1a39a5f465288d7cd8f9a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:54 -0700 Subject: bpf: Make bpf_link API available indepently of CONFIG_BPF_SYSCALL Similarly to bpf_prog, make bpf_link and related generic API available unconditionally to make it easier to have bpf_link support in various parts of the kernel. Stub out init/prime/settle/cleanup and inc/put APIs. Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-2-andriin@fb.com --- include/linux/bpf.h | 81 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8357be349133..40c5e206ecf2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -781,6 +781,32 @@ struct bpf_array_aux { struct work_struct work; }; +struct bpf_link { + atomic64_t refcnt; + u32 id; + enum bpf_link_type type; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); + void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); + int (*fill_link_info)(const struct bpf_link *link, + struct bpf_link_info *info); +}; + +struct bpf_link_primer { + struct bpf_link *link; + struct file *file; + int fd; + u32 id; +}; + struct bpf_struct_ops_value; struct btf_type; struct btf_member; @@ -1164,32 +1190,6 @@ static inline bool bpf_bypass_spec_v4(void) int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); -struct bpf_link { - atomic64_t refcnt; - u32 id; - enum bpf_link_type type; - const struct bpf_link_ops *ops; - struct bpf_prog *prog; - struct work_struct work; -}; - -struct bpf_link_primer { - struct bpf_link *link; - struct file *file; - int fd; - u32 id; -}; - -struct bpf_link_ops { - void (*release)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, - struct bpf_prog *old_prog); - void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); - int (*fill_link_info)(const struct bpf_link *link, - struct bpf_link_info *info); -}; - void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); @@ -1401,6 +1401,35 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, + struct bpf_prog *prog) +{ +} + +static inline int bpf_link_prime(struct bpf_link *link, + struct bpf_link_primer *primer) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_link_settle(struct bpf_link_primer *primer) +{ + return -EOPNOTSUPP; +} + +static inline void bpf_link_cleanup(struct bpf_link_primer *primer) +{ +} + +static inline void bpf_link_inc(struct bpf_link *link) +{ +} + +static inline void bpf_link_put(struct bpf_link *link) +{ +} + static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; -- cgit v1.2.3 From 7f0a838254bdd9114b978ef2541a6ce330307e9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:55 -0700 Subject: bpf, xdp: Maintain info on attached XDP BPF programs in net_device Instead of delegating to drivers, maintain information about which BPF programs are attached in which XDP modes (generic/skb, driver, or hardware) locally in net_device. This effectively obsoletes XDP_QUERY_PROG command. Such re-organization simplifies existing code already. But it also allows to further add bpf_link-based XDP attachments without drivers having to know about any of this at all, which seems like a good setup. XDP_SETUP_PROG/XDP_SETUP_PROG_HW are just low-level commands to driver to install/uninstall active BPF program. All the higher-level concerns about prog/link interaction will be contained within generic driver-agnostic logic. All the XDP_QUERY_PROG calls to driver in dev_xdp_uninstall() were removed. It's not clear for me why dev_xdp_uninstall() were passing previous prog_flags when resetting installed programs. That seems unnecessary, plus most drivers don't populate prog_flags anyways. Having XDP_SETUP_PROG vs XDP_SETUP_PROG_HW should be enough of an indicator of what is required of driver to correctly reset active BPF program. dev_xdp_uninstall() is also generalized as an iteration over all three supported mode. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-3-andriin@fb.com --- include/linux/netdevice.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ac2cd3f49aba..cad44b40c776 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -889,6 +889,17 @@ struct netlink_ext_ack; struct xdp_umem; struct xdp_dev_bulk_queue; +enum bpf_xdp_mode { + XDP_MODE_SKB = 0, + XDP_MODE_DRV = 1, + XDP_MODE_HW = 2, + __MAX_XDP_MODE +}; + +struct bpf_xdp_entity { + struct bpf_prog *prog; +}; + struct netdev_bpf { enum bpf_netdev_command command; union { @@ -2142,6 +2153,9 @@ struct net_device { #endif const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + + /* protected by rtnl_lock */ + struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3817,8 +3831,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, - enum bpf_netdev_command cmd); +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int xdp_umem_query(struct net_device *dev, u16 queue_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From aa8d3a716b59db6c1ad6c68fb8aa05e31980da60 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:57 -0700 Subject: bpf, xdp: Add bpf_link-based XDP attachment API Add bpf_link-based API (bpf_xdp_link) to attach BPF XDP program through BPF_LINK_CREATE command. bpf_xdp_link is mutually exclusive with direct BPF program attachment, previous BPF program should be detached prior to attempting to create a new bpf_xdp_link attachment (for a given XDP mode). Once BPF link is attached, it can't be replaced by other BPF program attachment or link attachment. It will be detached only when the last BPF link FD is closed. bpf_xdp_link will be auto-detached when net_device is shutdown, similarly to how other BPF links behave (cgroup, flow_dissector). At that point bpf_link will become defunct, but won't be destroyed until last FD is closed. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-5-andriin@fb.com --- include/linux/netdevice.h | 4 ++++ include/uapi/linux/bpf.h | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cad44b40c776..7d3c412fcfe5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -888,6 +888,7 @@ struct bpf_prog_offload_ops; struct netlink_ext_ack; struct xdp_umem; struct xdp_dev_bulk_queue; +struct bpf_xdp_link; enum bpf_xdp_mode { XDP_MODE_SKB = 0, @@ -898,6 +899,7 @@ enum bpf_xdp_mode { struct bpf_xdp_entity { struct bpf_prog *prog; + struct bpf_xdp_link *link; }; struct netdev_bpf { @@ -3831,7 +3833,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); + int xdp_umem_query(struct net_device *dev, u16 queue_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 828c2f6438f2..87823fb9c123 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,6 +230,7 @@ enum bpf_attach_type { BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, BPF_SK_LOOKUP, + BPF_XDP, __MAX_BPF_ATTACH_TYPE }; @@ -242,6 +243,7 @@ enum bpf_link_type { BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, + BPF_LINK_TYPE_XDP = 6, MAX_BPF_LINK_TYPE, }; @@ -614,7 +616,10 @@ union bpf_attr { struct { /* struct used by BPF_LINK_CREATE command */ __u32 prog_fd; /* eBPF program to attach */ - __u32 target_fd; /* object to attach to */ + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ } link_create; -- cgit v1.2.3 From c1931c9784ebb5787c0784c112fb8baa5e8455b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:59 -0700 Subject: bpf: Implement BPF XDP link-specific introspection APIs Implement XDP link-specific show_fdinfo and link_info to emit ifindex. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-7-andriin@fb.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 87823fb9c123..e1ba4ae6a916 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4069,6 +4069,9 @@ struct bpf_link_info { __u32 netns_ino; __u32 attach_type; } netns; + struct { + __u32 ifindex; + } xdp; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From e8407fdeb9a6866784e249881f6c786a0835faba Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:46:02 -0700 Subject: bpf, xdp: Remove XDP_QUERY_PROG and XDP_QUERY_PROG_HW XDP commands Now that BPF program/link management is centralized in generic net_device code, kernel code never queries program id from drivers, so XDP_QUERY_PROG/XDP_QUERY_PROG_HW commands are unnecessary. This patch removes all the implementations of those commands in kernel, along the xdp_attachment_query(). This patch was compile-tested on allyesconfig. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-10-andriin@fb.com --- include/linux/netdevice.h | 8 -------- include/net/xdp.h | 2 -- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7d3c412fcfe5..1046763cd0dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -876,8 +876,6 @@ enum bpf_netdev_command { */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, - XDP_QUERY_PROG, - XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, @@ -911,12 +909,6 @@ struct netdev_bpf { struct bpf_prog *prog; struct netlink_ext_ack *extack; }; - /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */ - struct { - u32 prog_id; - /* flags with which program was installed */ - u32 prog_flags; - }; /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */ struct { struct bpf_offloaded_map *offmap; diff --git a/include/net/xdp.h b/include/net/xdp.h index dbe9c60797e1..3814fb631d52 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -240,8 +240,6 @@ struct xdp_attachment_info { }; struct netdev_bpf; -int xdp_attachment_query(struct xdp_attachment_info *info, - struct netdev_bpf *bpf); bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, -- cgit v1.2.3 From 8f4c0e01789c18674acdf17cae3822b3dc3db715 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:16 -0400 Subject: hsr: enhance netlink socket interface to support PRP Parallel Redundancy Protocol (PRP) is another redundancy protocol introduced by IEC 63439 standard. It is similar to HSR in many aspects:- - Use a pair of Ethernet interfaces to created the PRP device - Use a 6 byte redundancy protocol part (RCT, Redundancy Check Trailer) similar to HSR Tag. - Has Link Redundancy Entity (LRE) that works with RCT to implement redundancy. Key difference is that the protocol unit is a trailer instead of a prefix as in HSR. That makes it inter-operable with tradition network components such as bridges/switches which treat it as pad bytes, whereas HSR nodes requires some kind of translators (Called redbox) to talk to regular network devices. This features allows regular linux box to be converted to a DAN-P box. DAN-P stands for Dual Attached Node - PRP similar to DAN-H (Dual Attached Node - HSR). Add a comment at the header/source code to explicitly state that the driver files also handles PRP protocol as well. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- include/uapi/linux/hsr_netlink.h | 2 +- include/uapi/linux/if_link.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/hsr_netlink.h b/include/uapi/linux/hsr_netlink.h index c218ef9c35dd..d540ea9bbef4 100644 --- a/include/uapi/linux/hsr_netlink.h +++ b/include/uapi/linux/hsr_netlink.h @@ -17,7 +17,7 @@ /* Generic Netlink HSR family definition */ -/* attributes */ +/* attributes for HSR or PRP node */ enum { HSR_A_UNSPEC, HSR_A_NODE_ADDR, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index af8f31987526..63af64646358 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -907,7 +907,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -917,6 +924,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; -- cgit v1.2.3 From cde1a8a992875a7479c4321b2a4a190c2e92ec2a Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Sun, 26 Jul 2020 23:12:28 +0200 Subject: Bluetooth: btusb: Fix and detect most of the Chinese Bluetooth controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason they tend to squat on the very first CSR/ Cambridge Silicon Radio VID/PID instead of paying fees. This is an extremely common problem; the issue goes as back as 2013 and these devices are only getting more popular, even rebranded by reputable vendors and sold by retailers everywhere. So, at this point in time there are hundreds of modern dongles reusing the ID of what originally was an early Bluetooth 1.1 controller. Linux is the only place where they don't work due to spotty checks in our detection code. It only covered a minimum subset. So what's the big idea? Take advantage of the fact that all CSR chips report the same internal version as both the LMP sub-version and HCI revision number. It always matches, couple that with the manufacturer code, that rarely lies, and we now have a good idea of who is who. Additionally, by compiling a list of user-reported HCI/lsusb dumps, and searching around for legit CSR dongles in similar product ranges we can find what CSR BlueCore firmware supported which Bluetooth versions. That way we can narrow down ranges of fakes for each of them. e.g. Real CSR dongles with LMP subversion 0x73 are old enough that support BT 1.1 only; so it's a dead giveaway when some third-party BT 4.0 dongle reuses it. So, to sum things up; there are multiple classes of fake controllers reusing the same 0A12:0001 VID/PID. This has been broken for a while. Known 'fake' bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 IC markings on 0x7558: FR3191AHAL 749H15143 (???) https://bugzilla.kernel.org/show_bug.cgi?id=60824 Fixes: 81cac64ba258ae (Deal with USB devices that are faking CSR vendor) Reported-by: Michał Wiśniewski Tested-by: Mike Johnson Tested-by: Ricardo Rodrigues Tested-by: M.Hanny Sabbagh Tested-by: Oussama BEN BRAHIM Tested-by: Ismael Ferreras Morezuelas Signed-off-by: Ismael Ferreras Morezuelas Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 ++ include/net/bluetooth/hci.h | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7ee8041af803..9125effbf448 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -41,6 +41,8 @@ #define BLUETOOTH_VER_1_1 1 #define BLUETOOTH_VER_1_2 2 #define BLUETOOTH_VER_2_0 3 +#define BLUETOOTH_VER_2_1 4 +#define BLUETOOTH_VER_4_0 6 /* Reserv for core and drivers use */ #define BT_SKB_RESERVE 8 diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1f18f71363e9..1317dfd8f962 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -227,6 +227,17 @@ enum { * supported. */ HCI_QUIRK_VALID_LE_STATES, + + /* When this quirk is set, then erroneous data reporting + * is ignored. This is mainly due to the fact that the HCI + * Read Default Erroneous Data Reporting command is advertised, + * but not supported; these controllers often reply with unknown + * command and tend to lock up randomly. Needing a hard reset. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, }; /* HCI device flags */ -- cgit v1.2.3 From d6945242f45d4745a8169fdab7afeb40f4e36f06 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 18 May 2020 11:13:52 +0300 Subject: net/mlx5: Hold pages RB tree per VF Per page request event, FW request to allocated or release pages for a single function. Driver maintains FW pages object per function, so there is no need to hold one global page data-base. Instead, have a page data-base per function, which will improve performance release flow in all cases, especially for "release all pages". As the range of function IDs is large and not sequential, use xarray to store a per function ID page data-base, where the function ID is the key. Upon first allocation of a page to a function ID, create the page data-base per function. This data-base will be released only at pagealloc mechanism cleanup. NIC: ConnectX-4 Lx CPU: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz Test case: 32 VFs, measure release pages on one VF as part of FLR Before: 0.021 Sec After: 0.014 Sec The improvement depends on amount of VFs and memory utilization by them. Time measurements above were taken from idle system. Signed-off-by: Eran Ben Elisha Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6a97ad601991..a0fcc4d13e93 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -541,7 +541,7 @@ struct mlx5_priv { /* pages stuff */ struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; - struct rb_root page_root; + struct xarray page_root_xa; int fw_pages; atomic_t reg_pages; struct list_head free_list; -- cgit v1.2.3 From e1613b5714ee6c186c9628e9958edf65e9d9cddd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Jul 2020 15:47:15 -0700 Subject: bpf: Fix bpf_ringbuf_output() signature to return long Due to bpf tree fix merge, bpf_ringbuf_output() signature ended up with int as a return type, while all other helpers got converted to returning long. So fix it in bpf-next now. Fixes: b0659d8a950d ("bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200727224715.652037-1-andriin@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e1ba4ae6a916..eb5e0c38eb2c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3241,7 +3241,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification -- cgit v1.2.3 From 035bfd051eae5b365368be915dfaf916aa501a52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:34 +0200 Subject: net: make sockptr_is_null strict aliasing safe While the kernel in general is not strict aliasing safe we can trivially do that in sockptr_is_null without affecting code generation, so always check the actually assigned union member. Reported-by: Jan Engelhardt Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 7d5cdb2b30b5..b13ea1422f93 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -64,7 +64,9 @@ static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p) static inline bool sockptr_is_null(sockptr_t sockptr) { - return !sockptr.user && !sockptr.kernel; + if (sockptr_is_kernel(sockptr)) + return !sockptr.kernel; + return !sockptr.user; } static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) -- cgit v1.2.3 From d3c48151512922dd35f1f393b30b9138e4441d14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:35 +0200 Subject: net: remove sockptr_advance sockptr_advance never properly worked. Replace it with _offset variants of copy_from_sockptr and copy_to_sockptr. Fixes: ba423fdaa589 ("net: add a new sockptr_t type") Reported-by: Jason A. Donenfeld Reported-by: Ido Schimmel Signed-off-by: Christoph Hellwig Acked-by: Jason A. Donenfeld Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/sockptr.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index b13ea1422f93..9e6c81d474cb 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -69,19 +69,26 @@ static inline bool sockptr_is_null(sockptr_t sockptr) return !sockptr.user; } -static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) +static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, + size_t offset, size_t size) { if (!sockptr_is_kernel(src)) - return copy_from_user(dst, src.user, size); - memcpy(dst, src.kernel, size); + return copy_from_user(dst, src.user + offset, size); + memcpy(dst, src.kernel + offset, size); return 0; } -static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) +static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) +{ + return copy_from_sockptr_offset(dst, src, 0, size); +} + +static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, + const void *src, size_t size) { if (!sockptr_is_kernel(dst)) - return copy_to_user(dst.user, src, size); - memcpy(dst.kernel, src, size); + return copy_to_user(dst.user + offset, src, size); + memcpy(dst.kernel + offset, src, size); return 0; } @@ -112,14 +119,6 @@ static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) return p; } -static inline void sockptr_advance(sockptr_t sockptr, size_t len) -{ - if (sockptr_is_kernel(sockptr)) - sockptr.kernel += len; - else - sockptr.user += len; -} - static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) { if (sockptr_is_kernel(src)) { -- cgit v1.2.3 From a31edb2059ed4e498f9aa8230c734b59d0ad797a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:36 +0200 Subject: net: improve the user pointer check in init_user_sockptr Make sure not just the pointer itself but the whole range lies in the user address space. For that pass the length and then use the access_ok helper to do the check. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 9e6c81d474cb..96840def9d69 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -27,14 +27,6 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p }; } - -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p) -{ - if ((unsigned long)p >= TASK_SIZE) - return -EFAULT; - sp->user = p; - return 0; -} #else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { @@ -53,14 +45,16 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } +#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p) +static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, + size_t size) { - sp->user = p; - sp->is_kernel = false; + if (!access_ok(p, size)) + return -EFAULT; + *sp = (sockptr_t) { .user = p }; return 0; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ static inline bool sockptr_is_null(sockptr_t sockptr) { -- cgit v1.2.3 From b8265621f4888af9494e1d685620871ec81bc33d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 23 Jul 2020 17:21:59 -0700 Subject: Add pldmfw library for PLDM firmware update The pldmfw library is used to implement common logic needed to flash devices based on firmware files using the format described by the PLDM for Firmware Update standard. This library consists of logic to parse the PLDM file format from a firmware file object, as well as common logic for sending the relevant PLDM header data to the device firmware. A simple ops table is provided so that device drivers can implement device specific hardware interactions while keeping the common logic to the pldmfw library. This library will be used by the Intel ice networking driver as part of implementing device flash update via devlink. The library aims to be vendor and device agnostic. For this reason, it has been placed in lib/pldmfw, in the hopes that other devices which use the PLDM firmware file format may benefit from it in the future. However, do note that not all features defined in the PLDM standard have been implemented. Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- include/linux/pldmfw.h | 165 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/linux/pldmfw.h (limited to 'include') diff --git a/include/linux/pldmfw.h b/include/linux/pldmfw.h new file mode 100644 index 000000000000..0fc831338226 --- /dev/null +++ b/include/linux/pldmfw.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2019, Intel Corporation. */ + +#ifndef _PLDMFW_H_ +#define _PLDMFW_H_ + +#include +#include + +#define PLDM_DEVICE_UPDATE_CONTINUE_AFTER_FAIL BIT(0) + +#define PLDM_STRING_TYPE_UNKNOWN 0 +#define PLDM_STRING_TYPE_ASCII 1 +#define PLDM_STRING_TYPE_UTF8 2 +#define PLDM_STRING_TYPE_UTF16 3 +#define PLDM_STRING_TYPE_UTF16LE 4 +#define PLDM_STRING_TYPE_UTF16BE 5 + +struct pldmfw_record { + struct list_head entry; + + /* List of descriptor TLVs */ + struct list_head descs; + + /* Component Set version string*/ + const u8 *version_string; + u8 version_type; + u8 version_len; + + /* Package Data length */ + u16 package_data_len; + + /* Bitfield of Device Update Flags */ + u32 device_update_flags; + + /* Package Data block */ + const u8 *package_data; + + /* Bitmap of components applicable to this record */ + unsigned long *component_bitmap; + u16 component_bitmap_len; +}; + +/* Standard descriptor TLV identifiers */ +#define PLDM_DESC_ID_PCI_VENDOR_ID 0x0000 +#define PLDM_DESC_ID_IANA_ENTERPRISE_ID 0x0001 +#define PLDM_DESC_ID_UUID 0x0002 +#define PLDM_DESC_ID_PNP_VENDOR_ID 0x0003 +#define PLDM_DESC_ID_ACPI_VENDOR_ID 0x0004 +#define PLDM_DESC_ID_PCI_DEVICE_ID 0x0100 +#define PLDM_DESC_ID_PCI_SUBVENDOR_ID 0x0101 +#define PLDM_DESC_ID_PCI_SUBDEV_ID 0x0102 +#define PLDM_DESC_ID_PCI_REVISION_ID 0x0103 +#define PLDM_DESC_ID_PNP_PRODUCT_ID 0x0104 +#define PLDM_DESC_ID_ACPI_PRODUCT_ID 0x0105 +#define PLDM_DESC_ID_VENDOR_DEFINED 0xFFFF + +struct pldmfw_desc_tlv { + struct list_head entry; + + const u8 *data; + u16 type; + u16 size; +}; + +#define PLDM_CLASSIFICATION_UNKNOWN 0x0000 +#define PLDM_CLASSIFICATION_OTHER 0x0001 +#define PLDM_CLASSIFICATION_DRIVER 0x0002 +#define PLDM_CLASSIFICATION_CONFIG_SW 0x0003 +#define PLDM_CLASSIFICATION_APP_SW 0x0004 +#define PLDM_CLASSIFICATION_INSTRUMENTATION 0x0005 +#define PLDM_CLASSIFICATION_BIOS 0x0006 +#define PLDM_CLASSIFICATION_DIAGNOSTIC_SW 0x0007 +#define PLDM_CLASSIFICATION_OS 0x0008 +#define PLDM_CLASSIFICATION_MIDDLEWARE 0x0009 +#define PLDM_CLASSIFICATION_FIRMWARE 0x000A +#define PLDM_CLASSIFICATION_CODE 0x000B +#define PLDM_CLASSIFICATION_SERVICE_PACK 0x000C +#define PLDM_CLASSIFICATION_SOFTWARE_BUNDLE 0x000D + +#define PLDM_ACTIVATION_METHOD_AUTO BIT(0) +#define PLDM_ACTIVATION_METHOD_SELF_CONTAINED BIT(1) +#define PLDM_ACTIVATION_METHOD_MEDIUM_SPECIFIC BIT(2) +#define PLDM_ACTIVATION_METHOD_REBOOT BIT(3) +#define PLDM_ACTIVATION_METHOD_DC_CYCLE BIT(4) +#define PLDM_ACTIVATION_METHOD_AC_CYCLE BIT(5) + +#define PLDMFW_COMPONENT_OPTION_FORCE_UPDATE BIT(0) +#define PLDMFW_COMPONENT_OPTION_USE_COMPARISON_STAMP BIT(1) + +struct pldmfw_component { + struct list_head entry; + + /* component identifier */ + u16 classification; + u16 identifier; + + u16 options; + u16 activation_method; + + u32 comparison_stamp; + + u32 component_size; + const u8 *component_data; + + /* Component version string */ + const u8 *version_string; + u8 version_type; + u8 version_len; + + /* component index */ + u8 index; + +}; + +/* Transfer flag used for sending components to the firmware */ +#define PLDM_TRANSFER_FLAG_START BIT(0) +#define PLDM_TRANSFER_FLAG_MIDDLE BIT(1) +#define PLDM_TRANSFER_FLAG_END BIT(2) + +struct pldmfw_ops; + +/* Main entry point to the PLDM firmware update engine. Device drivers + * should embed this in a private structure and use container_of to obtain + * a pointer to their own data, used to implement the device specific + * operations. + */ +struct pldmfw { + const struct pldmfw_ops *ops; + struct device *dev; +}; + +bool pldmfw_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record); + +/* Operations invoked by the generic PLDM firmware update engine. Used to + * implement device specific logic. + * + * @match_record: check if the device matches the given record. For + * convenience, a standard implementation is provided for PCI devices. + * + * @send_package_data: send the package data associated with the matching + * record to firmware. + * + * @send_component_table: send the component data associated with a given + * component to firmware. Called once for each applicable component. + * + * @flash_component: Flash the data for a given component to the device. + * Called once for each applicable component, after all component tables have + * been sent. + * + * @finalize_update: (optional) Finish the update. Called after all components + * have been flashed. + */ +struct pldmfw_ops { + bool (*match_record)(struct pldmfw *context, struct pldmfw_record *record); + int (*send_package_data)(struct pldmfw *context, const u8 *data, u16 length); + int (*send_component_table)(struct pldmfw *context, struct pldmfw_component *component, + u8 transfer_flag); + int (*flash_component)(struct pldmfw *context, struct pldmfw_component *component); + int (*finalize_update)(struct pldmfw *context); +}; + +int pldmfw_flash_image(struct pldmfw *context, const struct firmware *fw); + +#endif -- cgit v1.2.3 From b9aaec8f0be518096d1377082e0abe6a85e86ff3 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Sun, 26 Jul 2020 15:48:16 -0700 Subject: fib: use indirect call wrappers in the most common fib_rules_ops This avoids another inderect call per RX packet which save us around 20-40 ns. Changelog: v1 -> v2: - Move declaraions to fib_rules.h to remove warnings Reported-by: kernel test robot Signed-off-by: Brian Vazquez Signed-off-by: David S. Miller --- include/net/fib_rules.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index a259050f84af..4b10676c69d1 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -10,6 +10,7 @@ #include #include #include +#include struct fib_kuid_range { kuid_t start; @@ -203,4 +204,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); + +INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, + struct flowi *fl, int flags)); +INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule, + struct flowi *fl, int flags)); + +INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule, + struct flowi *flp, int flags, + struct fib_lookup_arg *arg)); +INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, + struct flowi *flp, int flags, + struct fib_lookup_arg *arg)); + +INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, + struct fib_lookup_arg *arg)); +INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, + struct fib_lookup_arg *arg)); #endif -- cgit v1.2.3 From 50935339c394adfb3d7253055e3bc10ee70264b0 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Sat, 25 Jul 2020 19:02:25 +0200 Subject: netfilter: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connmark.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index 1aa5c955ee1e..f01c19b83a2b 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -4,7 +4,7 @@ #include -/* Copyright (C) 2002,2004 MARA Systems AB +/* Copyright (C) 2002,2004 MARA Systems AB * by Henrik Nordstrom * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From 6540351e6f27ef718e3cf5b46349633f3ec57859 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 23 Jul 2020 18:08:56 +0530 Subject: Bluetooth: Translate additional address type correctly When using controller based address resolution, then the new address types 0x02 and 0x03 are used. These types need to be converted back into either public address or random address types. Signed-off-by: Marcel Holtmann Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1317dfd8f962..c36dccd6718e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2279,8 +2279,10 @@ struct hci_ev_le_conn_complete { #define LE_EXT_ADV_SCAN_RSP 0x0008 #define LE_EXT_ADV_LEGACY_PDU 0x0010 -#define ADDR_LE_DEV_PUBLIC 0x00 -#define ADDR_LE_DEV_RANDOM 0x01 +#define ADDR_LE_DEV_PUBLIC 0x00 +#define ADDR_LE_DEV_RANDOM 0x01 +#define ADDR_LE_DEV_PUBLIC_RESOLVED 0x02 +#define ADDR_LE_DEV_RANDOM_RESOLVED 0x03 #define HCI_EV_LE_ADVERTISING_REPORT 0x02 struct hci_ev_le_advertising_info { -- cgit v1.2.3 From e1d572357599d142df5764b39731b6eb55a22beb Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 23 Jul 2020 18:08:57 +0530 Subject: Bluetooth: Configure controller address resolution if available When the LL Privacy support is available, then as part of enabling or disabling passive background scanning, it is required to set up the controller based address resolution as well. Since only passive background scanning is utilizing the whitelist, the address resolution is now bound to the whitelist and passive background scanning. All other resolution can be easily done by the host stack. Signed-off-by: Marcel Holtmann Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bee1b4778ccc..8caac20556b4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1359,6 +1359,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) +/* Use LL Privacy based address resolution if supported */ +#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) + /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40)) -- cgit v1.2.3 From b2cc23398e8166b38f8715026273503b081c2a7a Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:02 +0530 Subject: Bluetooth: Enable RPA Timeout Enable RPA timeout during bluetooth initialization. The RPA timeout value is used from hdev, which initialized from debug_fs Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c36dccd6718e..dd82cce77a7a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1648,6 +1648,8 @@ struct hci_rp_le_read_resolv_list_size { #define HCI_OP_LE_SET_ADDR_RESOLV_ENABLE 0x202d +#define HCI_OP_LE_SET_RPA_TIMEOUT 0x202e + #define HCI_OP_LE_READ_MAX_DATA_LEN 0x202f struct hci_rp_le_read_max_data_len { __u8 status; -- cgit v1.2.3 From cbbdfa6f331980c6786b4ca5df53c37b90df3246 Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:03 +0530 Subject: Bluetooth: Enable controller RPA resolution using Experimental feature This patch adds support to enable the use of RPA Address resolution using expermental feature mgmt command. Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index dd82cce77a7a..c8e67042a3b1 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -318,6 +318,7 @@ enum { HCI_FORCE_BREDR_SMP, HCI_FORCE_STATIC_ADDR, HCI_LL_RPA_RESOLUTION, + HCI_ENABLE_LL_PRIVACY, HCI_CMD_PENDING, HCI_FORCE_NO_MITM, -- cgit v1.2.3 From df78a0c0b67de58934877aad61e0431a2bd0caf1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 1 Jun 2020 23:22:47 -0700 Subject: nl80211: S1G band and channel definitions Gives drivers the definitions needed to advertise support for S1G bands. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200602062247.23212-1-thomas@adapt-ip.com Link: https://lore.kernel.org/r/20200731055636.795173-1-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 17 +++++++++++++++++ include/uapi/linux/nl80211.h | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fc7e8807838d..ac6e58193426 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -417,6 +417,22 @@ struct ieee80211_edmg { enum ieee80211_edmg_bw_config bw_config; }; +/** + * struct ieee80211_sta_s1g_cap - STA's S1G capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11ah S1G capabilities for a STA. + * + * @s1g_supported: is STA an S1G STA + * @cap: S1G capabilities information + * @nss_mcs: Supported NSS MCS set + */ +struct ieee80211_sta_s1g_cap { + bool s1g; + u8 cap[10]; /* use S1G_CAPAB_ */ + u8 nss_mcs[5]; +}; + /** * struct ieee80211_supported_band - frequency band definition * @@ -448,6 +464,7 @@ struct ieee80211_supported_band { int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_edmg edmg_cap; u16 n_iftype_data; const struct ieee80211_sband_iftype_data *iftype_data; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4e6339ab1fce..ad183469f9af 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4437,6 +4437,11 @@ enum nl80211_key_mode { * attribute must be provided as well * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel + * @NL80211_CHAN_WIDTH_1: 1 MHz OFDM channel + * @NL80211_CHAN_WIDTH_2: 2 MHz OFDM channel + * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel + * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel + * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -4447,6 +4452,11 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_160, NL80211_CHAN_WIDTH_5, NL80211_CHAN_WIDTH_10, + NL80211_CHAN_WIDTH_1, + NL80211_CHAN_WIDTH_2, + NL80211_CHAN_WIDTH_4, + NL80211_CHAN_WIDTH_8, + NL80211_CHAN_WIDTH_16, }; /** @@ -4457,11 +4467,15 @@ enum nl80211_chan_width { * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide + * @NL80211_BSS_CHAN_WIDTH_1: control channel is 1 MHz wide + * @NL80211_BSS_CHAN_WIDTH_2: control channel is 2 MHz wide */ enum nl80211_bss_scan_width { NL80211_BSS_CHAN_WIDTH_20, NL80211_BSS_CHAN_WIDTH_10, NL80211_BSS_CHAN_WIDTH_5, + NL80211_BSS_CHAN_WIDTH_1, + NL80211_BSS_CHAN_WIDTH_2, }; /** @@ -4740,6 +4754,7 @@ enum nl80211_txrate_gi { * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) + * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace * since newer kernel versions may support more bands */ @@ -4748,6 +4763,7 @@ enum nl80211_band { NL80211_BAND_5GHZ, NL80211_BAND_60GHZ, NL80211_BAND_6GHZ, + NL80211_BAND_S1GHZ, NUM_NL80211_BANDS, }; -- cgit v1.2.3 From 987021726f9f41a1daf335c57cd7b6261109cdb2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:21 -0700 Subject: net/wireless: nl80211.h: drop duplicate words in comments Drop doubled words in several comments. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-1-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ad183469f9af..f47a7a8d0216 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -363,7 +363,7 @@ * @NL80211_CMD_SET_STATION: Set station attributes for station identified by * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the - * the interface identified by %NL80211_ATTR_IFINDEX. + * interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC * or, if no MAC address given, all stations, on the interface identified * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and @@ -383,7 +383,7 @@ * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by * %NL80211_ATTR_MAC. * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the - * the interface identified by %NL80211_ATTR_IFINDEX. + * interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. @@ -934,7 +934,7 @@ * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules. * * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the - * the new channel information (Channel Switch Announcement - CSA) + * new channel information (Channel Switch Announcement - CSA) * in the beacon for some time (as defined in the * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the * new channel. Userspace provides the new channel information (using @@ -1113,7 +1113,7 @@ * randomization may be enabled and configured by specifying the * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. - * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * A u64 cookie for further %NL80211_ATTR_COOKIE use is returned in * the netlink extended ack message. * * To cancel a measurement, close the socket that requested it. @@ -1511,7 +1511,7 @@ enum nl80211_commands { * rates as defined by IEEE 802.11 7.3.2.2 but without the length * restriction (at most %NL80211_MAX_SUPP_RATES). * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station - * to, or the AP interface the station was originally added to to. + * to, or the AP interface the station was originally added to. * @NL80211_ATTR_STA_INFO: information about a station, part of station info * given for %NL80211_CMD_GET_STATION, nested attribute containing * info as possible, see &enum nl80211_sta_info. @@ -2084,7 +2084,7 @@ enum nl80211_commands { * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels. * * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported - * supported operating classes. + * operating classes. * * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space * controls DFS operation in IBSS mode. If the flag is included in @@ -2395,7 +2395,7 @@ enum nl80211_commands { * nl80211_txq_stats) * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy. * The smaller of this and the memory limit is enforced. - * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the + * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory limit (in bytes) for the * TXQ queues for this phy. The smaller of this and the packet limit is * enforced. * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes @@ -5652,7 +5652,7 @@ enum nl80211_feature_flags { * enum nl80211_ext_feature_index - bit index of extended features. * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can - * can request to use RRM (see %NL80211_ATTR_USE_RRM) with + * request to use RRM (see %NL80211_ATTR_USE_RRM) with * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set * the ASSOC_REQ_USE_RRM flag in the association request even if * NL80211_FEATURE_QUIET is not advertized. @@ -6061,7 +6061,7 @@ enum nl80211_dfs_state { }; /** - * enum enum nl80211_protocol_features - nl80211 protocol features + * enum nl80211_protocol_features - nl80211 protocol features * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting * wiphy dumps (if requested by the application with the attribute * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the -- cgit v1.2.3 From 0f55c0c500f2bbfc5cc5590cdf6973b3f64dc195 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:22 -0700 Subject: net/wireless: wireless.h: drop duplicate word in comments Drop doubled word "threshold" in a comment. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-2-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/wireless.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index 24f3371ad826..08967b3f19c8 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -914,7 +914,7 @@ union iwreq_data { struct iw_param sens; /* signal level threshold */ struct iw_param bitrate; /* default bit rate */ struct iw_param txpower; /* default transmit power */ - struct iw_param rts; /* RTS threshold threshold */ + struct iw_param rts; /* RTS threshold */ struct iw_param frag; /* Fragmentation threshold */ __u32 mode; /* Operation mode */ struct iw_param retry; /* Retry limits & lifetime */ -- cgit v1.2.3 From 085a6c109b9dbcb6dfc0c7d1001f554a6d513342 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:23 -0700 Subject: net/wireless: cfg80211.h: drop duplicate words in comments Drop doubled word "by" in a comment. Change "operate in in" to "operate with in" as is used below. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-3-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ac6e58193426..5c7ea0b5dc50 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -439,7 +439,7 @@ struct ieee80211_sta_s1g_cap { * This structure describes a frequency band a wiphy * is able to operate in. * - * @channels: Array of channels the hardware can operate in + * @channels: Array of channels the hardware can operate with * in this band. * @band: the band this structure represents * @n_channels: Number of channels in @channels @@ -5527,7 +5527,7 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, * * @skb: The input A-MSDU frame without any headers. * @list: The output list of 802.3 frames. It must be allocated and - * initialized by by the caller. + * initialized by the caller. * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. -- cgit v1.2.3 From 66b239d28c7524324d2474b3faf206d00eadcd78 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:24 -0700 Subject: net/wireless: mac80211.h: drop duplicate words in comments Drop doubled words "are" and "by" in comments. Change doubled "to to" to "to the". Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-4-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 11d5610d2ad5..c0a597167f14 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2727,7 +2727,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * for devices that support offload of data packets (e.g. ARP responses). * * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag - * when they are able to replace in-use PTK keys according to to following + * when they are able to replace in-use PTK keys according to the following * requirements: * 1) They do not hand over frames decrypted with the old key to mac80211 once the call to set_key() with command %DISABLE_KEY has been @@ -4709,7 +4709,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, * * Call this function for all transmitted data frames after their transmit * completion. This callback should only be called for data frames which - * are are using driver's (or hardware's) offload capability of encap/decap + * are using driver's (or hardware's) offload capability of encap/decap * 802.11 frames. * * This function may not be called in IRQ context. Calls to this function @@ -6344,7 +6344,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid); * * Note that this must be called in an rcu_read_lock() critical section, * which can only be released after the SKB was handled. Some pointers in - * skb->cb, e.g. the key pointer, are protected by by RCU and thus the + * skb->cb, e.g. the key pointer, are protected by RCU and thus the * critical section must persist not just for the duration of this call * but for the duration of the frame handling. * However, also note that while in the wake_tx_queue() method, -- cgit v1.2.3 From dec4ca931244632eeca46f406b2ce7f5a1fe723f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 09:43:25 -0700 Subject: net/wireless: regulatory.h: drop duplicate word in comment Drop doubled word "of" in a comment. Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Link: https://lore.kernel.org/r/20200715164325.9109-5-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 09a3099886e5..47f06f6f5a67 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -44,7 +44,7 @@ enum environment_cap { * and potentially inform users of which devices specifically * cased the conflicts. * @initiator: indicates who sent this request, could be any of - * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) + * those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested * regulatory domain. We have a few special codes: * 00 - World regulatory domain -- cgit v1.2.3 From 2f1805ea209a146669e0b660633ed22f49e1dd49 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 25 Jun 2020 14:15:24 +0300 Subject: cfg80211: allow the low level driver to flush the BSS table The low level driver adds its own opaque information in the BSS table in the cfg80211_bss structure. The low level driver may need to signal that this information is no longer relevant and needs to be recreated. Add an API to allow the low level driver to do that. iwlwifi needs this because it keeps there an information about the firmware's internal clock. This is kept in mac80211's struct ieee80211_bss::sync_device_ts. This information is populated while we scan, we add the internal firmware's clock to each beacon which allows us to program the firmware correctly after association so that it'll know when (in terms of its internal clock) the DTIM and TBTT will happen. When the firmware is reset this internal clock is reset as well and ieee80211_bss::sync_device_ts is no longer accurate. iwlwifi will call this new API any time the firmware is started. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20200625111524.3992-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5c7ea0b5dc50..fa4d5627397f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7899,4 +7899,10 @@ void cfg80211_update_owe_info_event(struct net_device *netdev, struct cfg80211_update_owe_info *owe_info, gfp_t gfp); +/** + * cfg80211_bss_flush - resets all the scan entries + * @wiphy: the wiphy + */ +void cfg80211_bss_flush(struct wiphy *wiphy); + #endif /* __NET_CFG80211_H */ -- cgit v1.2.3 From e3718a611470d311a92c60d4eb535270b49a7108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 17 Jun 2020 09:30:33 +0200 Subject: cfg80211/mac80211: add mesh_param "mesh_nolearn" to skip path discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, before being able to forward a packet between two 802.11s nodes, both a PLINK handshake is performed upon receiving a beacon and then later a PREQ/PREP exchange for path discovery is performed on demand upon receiving a data frame to forward. When running a mesh protocol on top of an 802.11s interface, like batman-adv, we do not need the multi-hop mesh routing capabilities of 802.11s and usually set mesh_fwding=0. However, even with mesh_fwding=0 the PREQ/PREP path discovery is still performed on demand. Even though in this scenario the next hop PREQ/PREP will determine is always the direct 11s neighbor node. The new mesh_nolearn parameter allows to skip the PREQ/PREP exchange in this scenario, leading to a reduced delay, reduced packet buffering and simplifies HWMP in general. mesh_nolearn is still rather conservative in that if the packet destination is not a direct 11s neighbor, it will fall back to PREQ/PREP path discovery. For normal, multi-hop 802.11s mesh routing it is usually not advisable to enable mesh_nolearn as a transmission to a direct but distant neighbor might be worse than reaching that same node via a more robust / higher throughput etc. multi-hop path. Cc: Sven Eckelmann Cc: Simon Wunderlich Signed-off-by: Linus Lüssing Link: https://lore.kernel.org/r/20200617073034.26149-1-linus.luessing@c0d3.blue [fix nl80211 policy to range 0/1 only] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa4d5627397f..78b220950942 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1870,6 +1870,11 @@ struct bss_parameters { * connected to a mesh gate in mesh formation info. If false, the * value in mesh formation is determined by the presence of root paths * in the mesh path table + * @dot11MeshNolearn: Try to avoid multi-hop path discovery (e.g. PREQ/PREP + * for HWMP) if the destination is a direct neighbor. Note that this might + * not be the optimal decision as a multi-hop route might be better. So + * if using this setting you will likely also want to disable + * dot11MeshForwarding and use another mesh routing protocol on top. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1901,6 +1906,7 @@ struct mesh_config { enum nl80211_mesh_power_mode power_mode; u16 dot11MeshAwakeWindowDuration; u32 plink_timeout; + bool dot11MeshNolearn; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f47a7a8d0216..a83d8faf88ac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4236,6 +4236,12 @@ enum nl80211_mesh_power_mode { * field. If left unset then the mesh formation field will only * advertise such if there is an active root mesh path. * + * @NL80211_MESHCONF_NOLEARN: Try to avoid multi-hop path discovery (e.g. + * PREQ/PREP for HWMP) if the destination is a direct neighbor. Note that + * this might not be the optimal decision as a multi-hop route might be + * better. So if using this setting you will likely also want to disable + * dot11MeshForwarding and use another mesh routing protocol on top. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -4269,6 +4275,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_AWAKE_WINDOW, NL80211_MESHCONF_PLINK_TIMEOUT, NL80211_MESHCONF_CONNECTED_TO_GATE, + NL80211_MESHCONF_NOLEARN, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 184eebe664f0e11c485f6d309fe56297b3f75e9e Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:37 +0200 Subject: cfg80211/mac80211: add connected to auth server to meshconf Besides information about num of peerings and gate connectivity, the mesh formation byte also contains a flag for authentication server connectivity, that currently cannot be set in the mesh conf. This patch adds this capability, which is necessary to implement 802.1X authentication in mesh mode. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 + include/uapi/linux/nl80211.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 78b220950942..8d5071f84ffe 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1895,6 +1895,7 @@ struct mesh_config { u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; bool dot11MeshConnectedToMeshGate; + bool dot11MeshConnectedToAuthServer; u16 dot11MeshHWMPRannInterval; bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a83d8faf88ac..f1770e3756f4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4242,6 +4242,10 @@ enum nl80211_mesh_power_mode { * better. So if using this setting you will likely also want to disable * dot11MeshForwarding and use another mesh routing protocol on top. * + * @NL80211_MESHCONF_CONNECTED_TO_AS: If set to true then this mesh STA + * will advertise that it is connected to a authentication server + * in the mesh formation field. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -4276,6 +4280,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_PLINK_TIMEOUT, NL80211_MESHCONF_CONNECTED_TO_GATE, NL80211_MESHCONF_NOLEARN, + NL80211_MESHCONF_CONNECTED_TO_AS, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 1303a51c24100b3b1915d6f9072fe5ae5bb4c5f6 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:38 +0200 Subject: cfg80211/mac80211: add connected to auth server to station info This patch adds the necessary bits to later query the auth server flag for every peer from iw. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-2-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8d5071f84ffe..39fe21edd2c5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1598,6 +1598,7 @@ struct cfg80211_tid_stats { * an FCS error. This counter should be incremented only when TA of the * received packet with an FCS error matches the peer MAC address. * @airtime_link_metric: mesh airtime link metric. + * @connected_to_as: true if mesh STA has a path to authentication server */ struct station_info { u64 filled; @@ -1655,6 +1656,8 @@ struct station_info { u32 fcs_err_count; u32 airtime_link_metric; + + u8 connected_to_as; }; #if IS_ENABLED(CONFIG_CFG80211) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1770e3756f4..d6b6599a6001 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3370,6 +3370,8 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds) * of STA's association + * @NL80211_STA_INFO_CONNECTED_TO_AS: set to true if STA has a path to a + * authentication server (u8, 0 or 1) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3417,6 +3419,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_AIRTIME_WEIGHT, NL80211_STA_INFO_AIRTIME_LINK_METRIC, NL80211_STA_INFO_ASSOC_AT_BOOTTIME, + NL80211_STA_INFO_CONNECTED_TO_AS, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 48a54f6bc456859dabbd1fbd805e233d260754cf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 26 Jul 2020 15:09:46 +0200 Subject: net/fq_impl: use skb_get_hash instead of skb_get_hash_perturb This avoids unnecessarily regenerating the skb flow hash Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200726130947.88145-1-nbd@nbd.name [small commit message fixup] Signed-off-by: Johannes Berg --- include/net/fq.h | 1 - include/net/fq_impl.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/fq.h b/include/net/fq.h index 2ad85e683041..e39f3f8d5f8a 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -69,7 +69,6 @@ struct fq { struct list_head backlogs; spinlock_t lock; u32 flows_cnt; - siphash_key_t perturbation; u32 limit; u32 memory_limit; u32 memory_usage; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 38a9a3d1222b..e73d74d2fabf 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -108,7 +108,7 @@ begin: static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) { - u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); + u32 hash = skb_get_hash(skb); return reciprocal_scale(hash, fq->flows_cnt); } @@ -308,7 +308,6 @@ static int fq_init(struct fq *fq, int flows_cnt) INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); - get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ -- cgit v1.2.3 From fd17dba1c860d39f655a3a08387c21e3ceca8c55 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 20 Jul 2020 13:12:25 +0530 Subject: cfg80211: Add support to advertize OCV support Add a new feature flag that drivers can use to advertize support for Operating Channel Validation (OCV) when using driver's SME for RSNA handshakes. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200720074225.8990-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d6b6599a6001..a3ae2b060a55 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5804,6 +5804,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS: The driver * can report tx status for control port over nl80211 tx operations. * + * @NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION: Driver supports Operating + * Channel Validation (OCV) when using driver's SME for RSNA handshakes. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5859,6 +5862,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, + NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From e02281e7a5c524aaf6a52bb01afc4cc49addb908 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:49 +0400 Subject: mac80211: add radiotap flag to prevent sequence number overwrite The radiotap specification contains a flag to indicate that the sequence number of an injected frame should not be overwritten. Parse this flag and define and set a corresponding Tx control flag. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-2-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 1 + include/net/mac80211.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 459d355f6506..19c00d100096 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -117,6 +117,7 @@ enum ieee80211_radiotap_tx_flags { IEEE80211_RADIOTAP_F_TX_CTS = 0x0002, IEEE80211_RADIOTAP_F_TX_RTS = 0x0004, IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008, + IEEE80211_RADIOTAP_F_TX_NOSEQNO = 0x0010, }; /* for IEEE80211_RADIOTAP_MCS "have" flags */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c0a597167f14..21ce821a25e7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -825,6 +825,8 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup * @IEEE80211_TX_CTRL_HW_80211_ENCAP: This frame uses hardware encapsulation * (header conversion) + * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that + * has already been assigned to this frame. * * These flags are used in tx_info->control.flags. */ @@ -836,6 +838,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), IEEE80211_TX_CTRL_HW_80211_ENCAP = BIT(6), + IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), }; /* -- cgit v1.2.3 From cb17ed29a7a5fea8c9bf70e8a05757d71650e025 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:53 +0400 Subject: mac80211: parse radiotap header when selecting Tx queue Already parse the radiotap header in ieee80211_monitor_select_queue. In a subsequent commit this will allow us to add a radiotap flag that influences the queue on which injected packets will be sent. This also fixes the incomplete validation of the injected frame in ieee80211_monitor_select_queue: currently an out of bounds memory access may occur in in the called function ieee80211_select_queue_80211 if the 802.11 header is too small. Note that in ieee80211_monitor_start_xmit the radiotap header is parsed again, which is necessairy because ieee80211_monitor_select_queue is not always called beforehand. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-6-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 21ce821a25e7..6e26f0ba6fd0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6238,6 +6238,14 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta); +/** + * Sanity-check and parse the radiotap header of injected frames + * @skb: packet injected by userspace + * @dev: the &struct device of this 802.11 device + */ +bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, + struct net_device *dev); + /** * struct ieee80211_noa_data - holds temporary data for tracking P2P NoA state * -- cgit v1.2.3 From c5d1686b314ea44c5f210990dee05caf98cb068f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 26 Jul 2020 13:06:11 +0200 Subject: mac80211: add a function for running rx without passing skbs to the stack This can be used to run mac80211 rx processing on a batch of frames in NAPI poll before passing them to the network stack in a large batch. This can improve icache footprint, or it can be used to pass frames via netif_receive_skb_list. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200726110611.46886-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6e26f0ba6fd0..66e2bfd165e8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4360,6 +4360,31 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** + * ieee80211_rx_list - receive frame and store processed skbs in a list + * + * Use this function to hand received frames to mac80211. The receive + * buffer in @skb must start with an IEEE 802.11 header. In case of a + * paged @skb is used, the driver is recommended to put the ieee80211 + * header of the frame on the linear part of the @skb to avoid memory + * allocation and/or memcpy by the stack. + * + * This function may not be called in IRQ context. Calls to this function + * for a single hardware must be synchronized against each other. Calls to + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be + * mixed for a single hardware. Must not run concurrently with + * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * + * This function must be called with BHs disabled and RCU read lock + * + * @hw: the hardware this frame came in on + * @sta: the station the frame was received from, or %NULL + * @skb: the buffer to receive, owned by mac80211 after this call + * @list: the destination list + */ +void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, + struct sk_buff *skb, struct list_head *list); + /** * ieee80211_rx_napi - receive frame from NAPI context * -- cgit v1.2.3 From 75e6b594bbaeeb3f8287a2e6eb8811384b8c7195 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Jul 2020 13:00:52 +0200 Subject: cfg80211: invert HE BSS color 'disabled' to 'enabled' This is in fact 'disabled' in the spec, but there it's in a place where that actually makes sense. In our internal data structures, it doesn't really make sense, and in fact the previous commit just fixed a bug in that area. Make this safer by inverting the polarity from 'disabled' to 'enabled'. Link: https://lore.kernel.org/r/20200730130051.5d8399545bd9.Ie62fdcd1a6cd9c969315bc124084a494ca6c8df3@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 39fe21edd2c5..d9e6b9fbd95b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -267,12 +267,12 @@ struct ieee80211_he_obss_pd { * struct cfg80211_he_bss_color - AP settings for BSS coloring * * @color: the current color. - * @disabled: is the feature disabled. + * @enabled: HE BSS color is used * @partial: define the AID equation. */ struct cfg80211_he_bss_color { u8 color; - bool disabled; + bool enabled; bool partial; }; -- cgit v1.2.3 From f96622749a67d40ad5efe8a58d5fc95313097aa0 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Tue, 23 Jun 2020 08:49:35 -0500 Subject: nl80211: support 4-way handshake offloading for WPA/WPA2-PSK in AP mode Let drivers advertise support for AP-mode WPA/WPA2-PSK 4-way handshake offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag. Extend use of NL80211_ATTR_PMK attribute indicating it might be passed as part of NL80211_CMD_START_AP command, and contain the PSK (which is the PMK, hence the name). The driver is assumed to handle the 4-way handshake by itself in this case, instead of relying on userspace. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Link: https://lore.kernel.org/r/20200623134938.39997-2-chi-hsien.lin@cypress.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a3ae2b060a55..631f3a997b3c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -183,18 +183,27 @@ * * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers * can indicate they support offloading EAPOL handshakes for WPA/WPA2 - * preshared key authentication. In %NL80211_CMD_CONNECT the preshared - * key should be specified using %NL80211_ATTR_PMK. Drivers supporting - * this offload may reject the %NL80211_CMD_CONNECT when no preshared - * key material is provided, for example when that driver does not - * support setting the temporal keys through %CMD_NEW_KEY. + * preshared key authentication in station mode. In %NL80211_CMD_CONNECT + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_CONNECT when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. * * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be * set by drivers indicating offload support of the PTK/GTK EAPOL - * handshakes during 802.1X authentication. In order to use the offload - * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS - * attribute flag. Drivers supporting this offload may reject the - * %NL80211_CMD_CONNECT when the attribute flag is not present. + * handshakes during 802.1X authentication in station mode. In order to + * use the offload the %NL80211_CMD_CONNECT should have + * %NL80211_ATTR_WANT_1X_4WAY_HS attribute flag. Drivers supporting this + * offload may reject the %NL80211_CMD_CONNECT when the attribute flag is + * not present. + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication in AP mode. In %NL80211_CMD_START_AP + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_START_AP when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. * * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK * using %NL80211_CMD_SET_PMK. For offloaded FT support also @@ -2362,10 +2371,11 @@ enum nl80211_commands { * * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. - * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way - * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is - * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute - * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well. + * For %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP it is used to provide + * PSK for offloading 4-way handshake for WPA/WPA2-PSK networks. For 802.1X + * authentication it is used with %NL80211_CMD_SET_PMK. For offloaded FT + * support this attribute specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME + * is included as well. * * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to * indicate that it supports multiple active scheduled scan requests. @@ -5807,6 +5817,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION: Driver supports Operating * Channel Validation (OCV) when using driver's SME for RSNA handshakes. * + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK: Device wants to do 4-way + * handshake with PSK in AP mode (PSK is passed as part of the start AP + * command). + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5863,6 +5877,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From ffe8923f109b7ea92c0842c89e61300eefa11c94 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Jul 2020 13:34:46 +0200 Subject: netfilter: nft_compat: make sure xtables destructors have run Pablo Neira found that after recent update of xt_IDLETIMER the iptables-nft tests sometimes show an error. He tracked this down to the delayed cleanup used by nf_tables core: del rule (transaction A) add rule (transaction B) Its possible that by time transaction B (both in same netns) runs, the xt target destructor has not been invoked yet. For native nft expressions this is no problem because all expressions that have such side effects make sure these are handled from the commit phase, rather than async cleanup. For nft_compat however this isn't true. Instead of forcing synchronous behaviour for nft_compat, keep track of the number of outstanding destructor calls. When we attempt to create a new expression, flush the cleanup worker to make sure destructors have completed. With lots of help from Pablo Neira. Reported-by: Pablo Neira Ayso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6f0f6fca9ac3..2571c09be8bb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1498,4 +1498,6 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); + +void nf_tables_trans_destroy_flush_work(void); #endif /* _NET_NF_TABLES_H */ -- cgit v1.2.3 From f8ace8d915b88bd1bbaac695de94650dbb25c7b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:50 +0200 Subject: tcp: rename request_sock cookie_ts bit to syncookie Nowadays output function has a 'synack_type' argument that tells us when the syn/ack is emitted via syncookies. The request already tells us when timestamps are supported, so check both to detect special timestamp for tcp option encoding is needed. We could remove cookie_ts altogether, but a followup patch would otherwise need to adjust function signatures to pass 'want_cookie' to mptcp core. This way, the 'existing' bit can be used. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cf8b33213bbc..b2eb8b4ba697 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -54,7 +54,7 @@ struct request_sock { struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ - u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ + u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; -- cgit v1.2.3 From 08b8d080982fec354173d3fd28a3106a719b8950 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:53 +0200 Subject: mptcp: rename and export mptcp_subflow_request_sock_ops syncookie code path needs to create an mptcp request sock. Prepare for this and add mptcp prefix plus needed export of ops struct. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 02158c257bd4..76eb915bf91c 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -58,6 +58,7 @@ struct mptcp_out_options { }; #ifdef CONFIG_MPTCP +extern struct request_sock_ops mptcp_subflow_request_sock_ops; void mptcp_init(void); -- cgit v1.2.3 From c83a47e50d8fd3825a4758158e9edd5acdc74185 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:54 +0200 Subject: mptcp: subflow: add mptcp_subflow_init_cookie_req helper Will be used to initialize the mptcp request socket when a MP_CAPABLE request was handled in syncookie mode, i.e. when a TCP ACK containing a MP_CAPABLE option is a valid syncookie value. Normally (non-cookie case), MPTCP will generate a unique 32 bit connection ID and stores it in the MPTCP token storage to be able to retrieve the mptcp socket for subflow joining. In syncookie case, we do not want to store any state, so just generate the unique ID and use it in the reply. This means there is a small window where another connection could generate the same token. When Cookie ACK comes back, we check that the token has not been registered in the mean time. If it was, the connection needs to fall back to TCP. Changes in v2: - use req->syncookie instead of passing 'want_cookie' arg to ->init_req() (Eric Dumazet) Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 76eb915bf91c..3525d2822abe 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -131,6 +131,9 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, } void mptcp_seq_show(struct seq_file *seq); +int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb); #else static inline void mptcp_init(void) @@ -200,6 +203,13 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { } static inline void mptcp_seq_show(struct seq_file *seq) { } + +static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + return 0; /* TCP fallback */ +} #endif /* CONFIG_MPTCP */ #if IS_ENABLED(CONFIG_MPTCP_IPV6) -- cgit v1.2.3 From 6fc8c827dd4fa615965c4eac9bbfd465f6eb8fb4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:55 +0200 Subject: tcp: syncookies: create mptcp request socket for ACK cookies with MPTCP option If SYN packet contains MP_CAPABLE option, keep it enabled. Syncokie validation and cookie-based socket creation is changed to instantiate an mptcp request sockets if the ACK contains an MPTCP connection request. Rather than extend both cookie_v4/6_check, add a common helper to create the (mp)tcp request socket. Suggested-by: Paolo Abeni Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e0c35d56091f..dbf5c791a6eb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -469,6 +469,8 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. -- cgit v1.2.3 From 48040793fa6003d211f021c6ad273477bcd90d91 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 30 Jul 2020 15:44:40 -0700 Subject: tcp: add earliest departure time to SCM_TIMESTAMPING_OPT_STATS This change adds TCP_NLA_EDT to SCM_TIMESTAMPING_OPT_STATS that reports the earliest departure time(EDT) of the timestamped skb. By tracking EDT values of the skb from different timestamps, we can observe when and how much the value changed. This allows to measure the precise delay injected on the sender host e.g. by a bpf-base throttler. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/uapi/linux/tcp.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 527d668a5275..14b62d7df942 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -484,7 +484,8 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } -struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, + const struct sk_buff *orig_skb); static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) { diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index f2acb2566333..cfcb10b75483 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -313,6 +313,7 @@ enum { TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ + TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 829eb208e80d6db95c0201cb8fa00c2f9ad87faf Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 31 Jul 2020 17:34:01 -0700 Subject: rtnetlink: add support for protodown reason netdev protodown is a mechanism that allows protocols to hold an interface down. It was initially introduced in the kernel to hold links down by a multihoming protocol. There was also an attempt to introduce protodown reason at the time but was rejected. protodown and protodown reason is supported by almost every switching and routing platform. It was ok for a while to live without a protodown reason. But, its become more critical now given more than one protocol may need to keep a link down on a system at the same time. eg: vrrp peer node, port security, multihoming protocol. Its common for Network operators and protocol developers to look for such a reason on a networking box (Its also known as errDisable by most networking operators) This patch adds support for link protodown reason attribute. There are two ways to maintain protodown reasons. (a) enumerate every possible reason code in kernel - A protocol developer has to make a request and have that appear in a certain kernel version (b) provide the bits in the kernel, and allow user-space (sysadmin or NOS distributions) to manage the bit-to-reasonname map. - This makes extending reason codes easier (kind of like the iproute2 table to vrf-name map /etc/iproute2/rt_tables.d/) This patch takes approach (b). a few things about the patch: - It treats the protodown reason bits as counter to indicate active protodown users - Since protodown attribute is already an exposed UAPI, the reason is not enforced on a protodown set. Its a no-op if not used. the patch follows the below algorithm: - presence of reason bits set indicates protodown is in use - user can set protodown and protodown reason in a single or multiple setlink operations - setlink operation to clear protodown, will return -EBUSY if there are active protodown reason bits - reason is not included in link dumps if not used example with patched iproute2: $cat /etc/iproute2/protodown_reasons.d/r.conf 0 mlag 1 evpn 2 vrrp 3 psecurity $ip link set dev vxlan0 protodown on protodown_reason vrrp on $ip link set dev vxlan0 protodown_reason mlag on $ip link show 14: vxlan0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether f6:06:be:17:91:e7 brd ff:ff:ff:ff:ff:ff protodown on $ip link set dev vxlan0 protodown_reason mlag off $ip link set dev vxlan0 protodown off protodown_reason vrrp off Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 10 ++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ac2cd3f49aba..ba0fa6b22787 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2058,6 +2058,8 @@ struct net_device { struct timer_list watchdog_timer; int watchdog_timeo; + u32 proto_down_reason; + struct list_head todo_list; int __percpu *pcpu_refcnt; @@ -3810,6 +3812,8 @@ int dev_get_port_parent_id(struct net_device *dev, bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); int dev_change_proto_down_generic(struct net_device *dev, bool proto_down); +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 63af64646358..7fba4de511de 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -170,12 +170,22 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -- cgit v1.2.3 From 73b11c2ab072d5b0599d1e12cc126f55ee306daf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Jul 2020 11:28:26 -0700 Subject: bpf: Add support for forced LINK_DETACH command Add LINK_DETACH command to force-detach bpf_link without destroying it. It has the same behavior as auto-detaching of bpf_link due to cgroup dying for bpf_cgroup_link or net_device being destroyed for bpf_xdp_link. In such case, bpf_link is still a valid kernel object, but is defuncts and doesn't hold BPF program attached to corresponding BPF hook. This functionality allows users with enough access rights to manually force-detach attached bpf_link without killing respective owner process. This patch implements LINK_DETACH for cgroup, xdp, and netns links, mostly re-using existing link release handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200731182830.286260-2-andriin@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 40c5e206ecf2..cef4ef0d2b4e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -793,6 +793,7 @@ struct bpf_link { struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); + int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eb5e0c38eb2c..b134e679e9db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_cmd { BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, BPF_ITER_CREATE, + BPF_LINK_DETACH, }; enum bpf_map_type { @@ -634,6 +635,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { + __u32 link_fd; + } link_detach; + struct { /* struct used by BPF_ENABLE_STATS command */ __u32 type; } enable_stats; -- cgit v1.2.3 From c83e2a6e2fbbb7d47ea46fd7cb1f01292f2d0b6e Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Fri, 17 Jul 2020 05:11:38 +0000 Subject: wilc1000: Move wilc1000 SDIO ID's from driver source to common header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moved macros used for Vendor/Device ID from wilc1000 driver to common header file and changed macro name for consistency with other macros. Signed-off-by: Ajay Singh Acked-by: Ulf Hansson Acked-by: Pali Rohár Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200717051134.19160-1-ajay.kathat@microchip.com --- include/linux/mmc/sdio_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 15ed8ce9d394..519820d18e62 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -105,6 +105,9 @@ #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 #define SDIO_DEVICE_ID_MEDIATEK_MT7668 0x7668 +#define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 +#define SDIO_DEVICE_ID_MICROCHIP_WILC1000 0x5347 + #define SDIO_VENDOR_ID_SIANO 0x039a #define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201 #define SDIO_DEVICE_ID_SIANO_NICE 0x0202 -- cgit v1.2.3 From 4e56cde15f7d68cf86ff8efff8504497de152475 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 31 Jul 2020 21:38:30 +0300 Subject: mac80211: Handle special status codes in SAE commit SAE authentication has been extended with H2E (IEEE 802.11 REVmd) and PK (WFA) options. Those extensions use special status code values in the SAE commit messages (Authentication frame with transaction sequence number 1) to identify which extension is in use. mac80211 was interpreting those new values as the AP denying authentication and that resulted in failure to complete SAE authentication in some cases. Fix this by adding exceptions for the new status code values 126 and 127. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200731183830.18735-1-jouni@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9f732499ea88..c47f43e65a2f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2561,6 +2561,8 @@ enum ieee80211_statuscode { /* 802.11ai */ WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, + WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, + WLAN_STATUS_SAE_PK = 127, }; -- cgit v1.2.3 From 73f9407b3eb893bc8a82293cc8d4dfa3db079c0b Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 3 Aug 2020 10:33:04 +0300 Subject: netfilter: conntrack: Move nf_ct_offload_timeout to header file To be used by callers from other modules. [ Rename DAY to NF_CT_DAY to avoid possible symbol name pollution issue --Pablo ] Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 90690e37a56f..c7bfddfc65b0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) !nf_ct_is_dying(ct); } +#define NF_CT_DAY (86400 * HZ) + +/* Set an arbitrary timeout large enough not to ever expire, this save + * us a check for the IPS_OFFLOAD_BIT from the packet path via + * nf_ct_is_expired(). + */ +static inline void nf_ct_offload_timeout(struct nf_conn *ct) +{ + if (nf_ct_expires(ct) < NF_CT_DAY / 2) + ct->timeout = nfct_time_stamp + NF_CT_DAY; +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); -- cgit v1.2.3 From 966e50597666d530b69de2abb9c83ff0a9bd3ee6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Jul 2020 14:47:58 -0700 Subject: udp_tunnel: add the ability to hard-code IANA VXLAN mlx5 has the IANA VXLAN port (4789) hard coded by the device, instead of being added dynamically when tunnels are created. To support this add a workaround flag to struct udp_tunnel_nic_info. Skipping updates for the port is fairly trivial, dumping the hard coded port via ethtool requires some code duplication. The port is not a part of any real table, we dump it in a special table which has no tunnel types supported and only one entry. This is the last known workaround / hack needed to convert all drivers to the new infra. Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- include/net/udp_tunnel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index dd20ce99740c..94bb7a882250 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -193,6 +193,11 @@ enum udp_tunnel_nic_info_flags { UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), /* Device supports only IPv4 tunnels */ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), + /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. + * This port must not be counted towards n_entries of any table. + * Driver will not receive any callback associated with port 4789. + */ + UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), }; /** -- cgit v1.2.3 From bb3831294cd50750806f2ce8d73317dc8feeda09 Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Thu, 30 Jul 2020 21:57:48 +0200 Subject: net: mdiobus: add reset-post-delay-us handling Load new "reset-post-delay-us" value from MDIO properties, and if configured to a greater then zero delay do a flexible sleeping delay after MDIO bus reset deassert. This allows devices to exit reset state before start bus communication. Signed-off-by: Bruno Thomsen Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0403eb799913..3a09d2bf69ea 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -293,6 +293,8 @@ struct mii_bus { /* GPIO reset pulse width in microseconds */ int reset_delay_us; + /* GPIO reset deassert delay in microseconds */ + int reset_post_delay_us; /* RESET GPIO descriptor pointer */ struct gpio_desc *reset_gpiod; -- cgit v1.2.3 From 038ebb1a713d114d54dbf14868a73181c0c92758 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 31 Jul 2020 10:45:01 +0800 Subject: net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct When openvswitch conntrack offload with act_ct action. Fragment packets defrag in the ingress tc act_ct action and miss the next chain. Then the packet pass to the openvswitch datapath without the mru. The over mtu packet will be dropped in output action in openvswitch for over mtu. "kernel: net2: dropped over-mtu packet: 1528 > 1500" This patch add mru in the tc_skb_ext for adefrag and miss next chain situation. And also add mru in the qdisc_skb_cb. The act_ct set the mru to the qdisc_skb_cb when the packet defrag. And When the chain miss, The mru is set to tc_skb_ext which can be got by ovs datapath. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/sch_generic.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fa817a105517..3ad65d4ce085 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -283,6 +283,7 @@ struct nf_bridge_info { */ struct tc_skb_ext { __u32 chain; + __u16 mru; }; #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c510b03b9751..d60e7c39d60c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -384,6 +384,7 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; + u16 mru; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -463,7 +464,7 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; - BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); BUILD_BUG_ON(sizeof(qcb->data) < sz); } -- cgit v1.2.3 From 9d2f627b7ec9d5d3246b6cec17f290ee6778c83b Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:20:56 +0200 Subject: net: openvswitch: add masks cache hit counter Add a counter that counts the number of masks cache hits, and export it through the megaflow netlink statistics. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9b14519e74d9..7cb76e5ca7cf 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -102,8 +102,8 @@ struct ovs_dp_megaflow_stats { __u64 n_mask_hit; /* Number of masks used for flow lookups. */ __u32 n_masks; /* Number of masks for the datapath. */ __u32 pad0; /* Pad for future expension. */ + __u64 n_cache_hit; /* Number of cache matches for flow lookups. */ __u64 pad1; /* Pad for future expension. */ - __u64 pad2; /* Pad for future expension. */ }; struct ovs_vport_stats { -- cgit v1.2.3 From 9bf24f594c6acf676fb8c229f152c21bfb915ddb Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:21:34 +0200 Subject: net: openvswitch: make masks cache size configurable This patch makes the masks cache size configurable, or with a size of 0, disable it. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7cb76e5ca7cf..8300cc29dec8 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -86,6 +86,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, + OVS_DP_ATTR_MASKS_CACHE_SIZE, __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From 88fab21c691bb1ff164e540735237a385e3afeaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ioana-Ruxandra=20St=C4=83ncioi?= Date: Mon, 3 Aug 2020 07:33:33 +0000 Subject: seg6_iptunnel: Refactor seg6_lwt_headroom out of uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the function seg6_lwt_headroom out of the seg6_iptunnel.h uapi header, because it is only used in seg6_iptunnel.c. Moreover, it is only used in the kernel code, as indicated by the "#ifdef __KERNEL__". Suggested-by: David Miller Signed-off-by: Ioana-Ruxandra Stăncioi Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index 09fb608a35ec..eb815e0d0ac3 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -37,25 +37,4 @@ enum { SEG6_IPTUN_MODE_L2ENCAP, }; -#ifdef __KERNEL__ - -static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) -{ - int head = 0; - - switch (tuninfo->mode) { - case SEG6_IPTUN_MODE_INLINE: - break; - case SEG6_IPTUN_MODE_ENCAP: - head = sizeof(struct ipv6hdr); - break; - case SEG6_IPTUN_MODE_L2ENCAP: - return 0; - } - - return ((tuninfo->srh->hdrlen + 1) << 3) + head; -} - -#endif - #endif -- cgit v1.2.3 From 08e335f6ad35a019f4cb1a74badc2f4bceb63bcf Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 3 Aug 2020 19:11:33 +0300 Subject: devlink: Add early_drop trap Add the packet trap that can report packets that were ECN marked due to RED AQM. Signed-off-by: Amit Cohen Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0606967cb501..fd3ae0760492 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -703,6 +703,7 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_PTP_GENERAL, DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_SAMPLE, DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_TRAP, + DEVLINK_TRAP_GENERIC_ID_EARLY_DROP, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -891,6 +892,8 @@ enum devlink_trap_group_generic_id { "flow_action_sample" #define DEVLINK_TRAP_GENERIC_NAME_FLOW_ACTION_TRAP \ "flow_action_trap" +#define DEVLINK_TRAP_GENERIC_NAME_EARLY_DROP \ + "early_drop" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" -- cgit v1.2.3 From c88e11e04716ab4ed51d5972ea04c7b70b6e9d8a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 3 Aug 2020 19:11:34 +0300 Subject: devlink: Pass extack when setting trap's action and group's parameters A later patch will refuse to set the action of certain traps in mlxsw and also to change the policer binding of certain groups. Pass extack so that failure could be communicated clearly to user space. Reviewed-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index fd3ae0760492..8f3c8a443238 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1077,7 +1077,8 @@ struct devlink_ops { */ int (*trap_action_set)(struct devlink *devlink, const struct devlink_trap *trap, - enum devlink_trap_action action); + enum devlink_trap_action action, + struct netlink_ext_ack *extack); /** * @trap_group_init: Trap group initialization function. * @@ -1094,7 +1095,8 @@ struct devlink_ops { */ int (*trap_group_set)(struct devlink *devlink, const struct devlink_trap_group *group, - const struct devlink_trap_policer *policer); + const struct devlink_trap_policer *policer, + struct netlink_ext_ack *extack); /** * @trap_policer_init: Trap policer initialization function. * -- cgit v1.2.3 From 6c84a589972f6458da3168a68e7d65f6ca8687f8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 3 Aug 2020 13:03:52 -0700 Subject: net: dsa: loop: Move data structures to header In preparation for adding support for a mockup data path, move the driver data structures to include/linux/dsa/loop.h such that we can share them between net/dsa/ and drivers/net/dsa/ later on. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/loop.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/dsa/loop.h (limited to 'include') diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h new file mode 100644 index 000000000000..bb39401a8056 --- /dev/null +++ b/include/linux/dsa/loop.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef DSA_LOOP_H +#define DSA_LOOP_H + +#include +#include +#include + +struct dsa_loop_vlan { + u16 members; + u16 untagged; +}; + +struct dsa_loop_mib_entry { + char name[ETH_GSTRING_LEN]; + unsigned long val; +}; + +enum dsa_loop_mib_counters { + DSA_LOOP_PHY_READ_OK, + DSA_LOOP_PHY_READ_ERR, + DSA_LOOP_PHY_WRITE_OK, + DSA_LOOP_PHY_WRITE_ERR, + __DSA_LOOP_CNT_MAX, +}; + +struct dsa_loop_port { + struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX]; + u16 pvid; +}; + +struct dsa_loop_priv { + struct mii_bus *bus; + unsigned int port_base; + struct dsa_loop_vlan vlans[VLAN_N_VID]; + struct net_device *netdev; + struct dsa_loop_port ports[DSA_MAX_PORTS]; +}; + +#endif /* DSA_LOOP_H */ -- cgit v1.2.3 From c99194eded25deb58f606e59b3101ba05e786021 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 3 Aug 2020 13:03:53 -0700 Subject: net: dsa: loop: Wire-up MTU callbacks For now we simply store the port MTU into a per-port member. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/dsa/loop.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h index bb39401a8056..5a3470bcc8a7 100644 --- a/include/linux/dsa/loop.h +++ b/include/linux/dsa/loop.h @@ -27,6 +27,7 @@ enum dsa_loop_mib_counters { struct dsa_loop_port { struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX]; u16 pvid; + int mtu; }; struct dsa_loop_priv { -- cgit v1.2.3 From df23bb18b44b9a1f2b54358201730e710a9df57f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 4 Aug 2020 07:53:42 +0200 Subject: ipv4: route: Ignore output interface in FIB lookup for PMTU route Currently, processes sending traffic to a local bridge with an encapsulation device as a port don't get ICMP errors if they exceed the PMTU of the encapsulated link. David Ahern suggested this as a hack, but it actually looks like the correct solution: when we update the PMTU for a given destination by means of updating or creating a route exception, the encapsulation might trigger this because of PMTU discovery happening either on the encapsulation device itself, or its lower layer. This happens on bridged encapsulations only. The output interface shouldn't matter, because we already have a valid destination. Drop the output interface restriction from the associated route lookup. For UDP tunnels, we will now have a route exception created for the encapsulation itself, with a MTU value reflecting its headroom, which allows a bridge forwarding IP packets originated locally to deliver errors back to the sending socket. The behaviour is now consistent with IPv6 and verified with selftests pmtu_ipv{4,6}_br_{geneve,vxlan}{4,6}_exception introduced later in this series. v2: - reset output interface only for bridge ports (David Ahern) - add and use netif_is_any_bridge_port() helper (David Ahern) Suggested-by: David Ahern Signed-off-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88d40b9abaa1..90444622b703 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4840,6 +4840,11 @@ static inline bool netif_is_ovs_port(const struct net_device *dev) return dev->priv_flags & IFF_OVS_DATAPATH; } +static inline bool netif_is_any_bridge_port(const struct net_device *dev) +{ + return netif_is_bridge_port(dev) || netif_is_ovs_port(dev); +} + static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; -- cgit v1.2.3 From 4cb47a8644cc9eb8ec81190a50e79e6530d0297f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 4 Aug 2020 07:53:43 +0200 Subject: tunnels: PMTU discovery support for directly bridged IP packets It's currently possible to bridge Ethernet tunnels carrying IP packets directly to external interfaces without assigning them addresses and routes on the bridged network itself: this is the case for UDP tunnels bridged with a standard bridge or by Open vSwitch. PMTU discovery is currently broken with those configurations, because the encapsulation effectively decreases the MTU of the link, and while we are able to account for this using PMTU discovery on the lower layer, we don't have a way to relay ICMP or ICMPv6 messages needed by the sender, because we don't have valid routes to it. On the other hand, as a tunnel endpoint, we can't fragment packets as a general approach: this is for instance clearly forbidden for VXLAN by RFC 7348, section 4.3: VTEPs MUST NOT fragment VXLAN packets. Intermediate routers may fragment encapsulated VXLAN packets due to the larger frame size. The destination VTEP MAY silently discard such VXLAN fragments. The same paragraph recommends that the MTU over the physical network accomodates for encapsulations, but this isn't a practical option for complex topologies, especially for typical Open vSwitch use cases. Further, it states that: Other techniques like Path MTU discovery (see [RFC1191] and [RFC1981]) MAY be used to address this requirement as well. Now, PMTU discovery already works for routed interfaces, we get route exceptions created by the encapsulation device as they receive ICMP Fragmentation Needed and ICMPv6 Packet Too Big messages, and we already rebuild those messages with the appropriate MTU and route them back to the sender. Add the missing bits for bridged cases: - checks in skb_tunnel_check_pmtu() to understand if it's appropriate to trigger a reply according to RFC 1122 section 3.2.2 for ICMP and RFC 4443 section 2.4 for ICMPv6. This function is already called by UDP tunnels - a new function generating those ICMP or ICMPv6 replies. We can't reuse icmp_send() and icmp6_send() as we don't see the sender as a valid destination. This doesn't need to be generic, as we don't cover any other type of ICMP errors given that we only provide an encapsulation function to the sender While at it, make the MTU check in skb_tunnel_check_pmtu() accurate: we might receive GSO buffers here, and the passed headroom already includes the inner MAC length, so we don't have to account for it a second time (that would imply three MAC headers on the wire, but there are just two). This issue became visible while bridging IPv6 packets with 4500 bytes of payload over GENEVE using IPv4 with a PMTU of 4000. Given the 50 bytes of encapsulation headroom, we would advertise MTU as 3950, and we would reject fragmented IPv6 datagrams of 3958 bytes size on the wire. We're exclusively dealing with network MTU here, though, so we could get Ethernet frames up to 3964 octets in that case. v2: - moved skb_tunnel_check_pmtu() to ip_tunnel_core.c (David Ahern) - split IPv4/IPv6 functions (David Ahern) Signed-off-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/dst.h | 10 ---------- include/net/ip_tunnels.h | 2 ++ 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 852d8fb36ab7..6ae2e625050d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -535,14 +535,4 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } -static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, - struct dst_entry *encap_dst, - int headroom) -{ - u32 encap_mtu = dst_mtu(encap_dst); - - if (skb->len > encap_mtu - headroom) - skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom); -} - #endif /* _NET_DST_H */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 36025dea7612..02ccd32542d0 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -420,6 +420,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); +int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, + int headroom, bool reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); -- cgit v1.2.3 From 81f6cb31222d286dab65579d61f96664eeb99e0b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Aug 2020 23:34:46 +0800 Subject: ipv6: add ipv6_dev_find() This is to add an ip_dev_find like function for ipv6, used to find the dev by saddr. It will be used by TIPC protocol. So also export it. Signed-off-by: Xin Long Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8418b7d38468..ba3f6c15ad2b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -97,6 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr, int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr); + struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict); -- cgit v1.2.3