From 8aa26c575fb343ebde810b30dad0cba7d8121efb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 18 Aug 2020 10:17:33 +0200
Subject: netlink: make NLA_BINARY validation more flexible

Add range validation for NLA_BINARY, allowing validation of any
combination of combination minimum or maximum lengths, using the
existing NLA_POLICY_RANGE()/NLA_POLICY_FULL_RANGE() macros, just
like for integers where the value is checked.

Also make NLA_POLICY_EXACT_LEN(), NLA_POLICY_EXACT_LEN_WARN()
and NLA_POLICY_MIN_LEN() special cases of this, removing the old
types NLA_EXACT_LEN and NLA_MIN_LEN.

This allows us to save some code where both minimum and maximum
lengths are requires, currently the policy only allows maximum
(NLA_BINARY), minimum (NLA_MIN_LEN) or exact (NLA_EXACT_LEN), so
a range of lengths cannot be accepted and must be checked by the
code that consumes the attributes later.

Also, this allows advertising the correct ranges in the policy
export to userspace. Here, NLA_MIN_LEN and NLA_EXACT_LEN already
were special cases of NLA_BINARY with min and min/max length
respectively.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 58 +++++++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index c0411f14fb53..fdd317f8fde4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -181,8 +181,6 @@ enum {
 	NLA_S64,
 	NLA_BITFIELD32,
 	NLA_REJECT,
-	NLA_EXACT_LEN,
-	NLA_MIN_LEN,
 	__NLA_TYPE_MAX,
 };
 
@@ -199,11 +197,11 @@ struct netlink_range_validation_signed {
 enum nla_policy_validation {
 	NLA_VALIDATE_NONE,
 	NLA_VALIDATE_RANGE,
+	NLA_VALIDATE_RANGE_WARN_TOO_LONG,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
-	NLA_VALIDATE_WARN_TOO_LONG,
 };
 
 /**
@@ -222,7 +220,7 @@ enum nla_policy_validation {
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
  *    NLA_BINARY           Maximum length of attribute payload
- *    NLA_MIN_LEN          Minimum length of attribute payload
+ *                         (but see also below with the validation type)
  *    NLA_NESTED,
  *    NLA_NESTED_ARRAY     Length verification is done by checking len of
  *                         nested header (or empty); len field is used if
@@ -237,11 +235,6 @@ enum nla_policy_validation {
  *                         just like "All other"
  *    NLA_BITFIELD32       Unused
  *    NLA_REJECT           Unused
- *    NLA_EXACT_LEN        Attribute should have exactly this length, otherwise
- *                         it is rejected or warned about, the latter happening
- *                         if and only if the `validation_type' is set to
- *                         NLA_VALIDATE_WARN_TOO_LONG.
- *    NLA_MIN_LEN          Minimum length of attribute payload
  *    All other            Minimum length of attribute payload
  *
  * Meaning of validation union:
@@ -296,6 +289,11 @@ enum nla_policy_validation {
  *                         pointer to a struct netlink_range_validation_signed
  *                         that indicates the min/max values.
  *                         Use NLA_POLICY_FULL_RANGE_SIGNED().
+ *
+ *    NLA_BINARY           If the validation type is like the ones for integers
+ *                         above, then the min/max length (not value like for
+ *                         integers) of the attribute is enforced.
+ *
  *    All other            Unused - but note that it's a union
  *
  * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
@@ -309,7 +307,7 @@ enum nla_policy_validation {
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- *	[ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
+ *	[ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
  *	[ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
  * };
  */
@@ -335,9 +333,10 @@ struct nla_policy {
 		 * nesting validation starts here.
 		 *
 		 * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
-		 * for any types >= this, so need to use NLA_MIN_LEN to get the
-		 * previous pure { .len = xyz } behaviour. The advantage of this
-		 * is that types not specified in the policy will be rejected.
+		 * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
+		 * get the previous pure { .len = xyz } behaviour. The advantage
+		 * of this is that types not specified in the policy will be
+		 * rejected.
 		 *
 		 * For completely new families it should be set to 1 so that the
 		 * validation is enforced for all attributes. For existing ones
@@ -349,12 +348,6 @@ struct nla_policy {
 	};
 };
 
-#define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len) \
-	{ .type = NLA_EXACT_LEN, .len = _len, \
-	  .validation_type = NLA_VALIDATE_WARN_TOO_LONG, }
-#define NLA_POLICY_MIN_LEN(_len)	{ .type = NLA_MIN_LEN, .len = _len }
-
 #define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
 #define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
 
@@ -370,19 +363,21 @@ struct nla_policy {
 	{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
-#define NLA_ENSURE_UINT_TYPE(tp)			\
+#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 ||	\
 		      tp == NLA_U32 || tp == NLA_U64 ||	\
-		      tp == NLA_MSECS) + tp)
+		      tp == NLA_MSECS ||		\
+		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_SINT_TYPE(tp)			\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16  ||	\
 		      tp == NLA_S32 || tp == NLA_S64) + tp)
-#define NLA_ENSURE_INT_TYPE(tp)				\
+#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
 		      tp == NLA_S32 || tp == NLA_U32 ||	\
 		      tp == NLA_S64 || tp == NLA_U64 ||	\
-		      tp == NLA_MSECS) + tp)
+		      tp == NLA_MSECS ||		\
+		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_NO_VALIDATION_PTR(tp)		\
 	(__NLA_ENSURE(tp != NLA_BITFIELD32 &&		\
 		      tp != NLA_REJECT &&		\
@@ -390,14 +385,14 @@ struct nla_policy {
 		      tp != NLA_NESTED_ARRAY) + tp)
 
 #define NLA_POLICY_RANGE(tp, _min, _max) {		\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_RANGE,		\
 	.min = _min,					\
 	.max = _max					\
 }
 
 #define NLA_POLICY_FULL_RANGE(tp, _range) {		\
-	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_RANGE_PTR,	\
 	.range = _range,				\
 }
@@ -409,13 +404,13 @@ struct nla_policy {
 }
 
 #define NLA_POLICY_MIN(tp, _min) {			\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_MIN,		\
 	.min = _min,					\
 }
 
 #define NLA_POLICY_MAX(tp, _max) {			\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_MAX,		\
 	.max = _max,					\
 }
@@ -427,6 +422,15 @@ struct nla_policy {
 	.len = __VA_ARGS__ + 0,				\
 }
 
+#define NLA_POLICY_EXACT_LEN(_len)	NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
+#define NLA_POLICY_EXACT_LEN_WARN(_len) {			\
+	.type = NLA_BINARY,					\
+	.validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG,	\
+	.min = _len,						\
+	.max = _len						\
+}
+#define NLA_POLICY_MIN_LEN(_len)	NLA_POLICY_MIN(NLA_BINARY, _len)
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
-- 
cgit v1.2.3


From b558b6c24068d87ffcd95dad3bf0d9bd2ac290e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 18 Aug 2020 18:07:09 -0700
Subject: net-tun: Add type safety to tun_xdp_to_ptr() and tun_ptr_to_xdp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reduces likelihood of incorrect use.

Test: builds

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819010710.3959310-1-zenczykowski@gmail.com
---
 include/linux/if_tun.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 5bda8cf457b6..6c37e1dbc5df 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -28,8 +28,8 @@ struct tun_xdp_hdr {
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
 bool tun_is_xdp_frame(void *ptr);
-void *tun_xdp_to_ptr(void *ptr);
-void *tun_ptr_to_xdp(void *ptr);
+void *tun_xdp_to_ptr(struct xdp_frame *xdp);
+struct xdp_frame *tun_ptr_to_xdp(void *ptr);
 void tun_ptr_free(void *ptr);
 #else
 #include <linux/err.h>
@@ -48,11 +48,11 @@ static inline bool tun_is_xdp_frame(void *ptr)
 {
 	return false;
 }
-static inline void *tun_xdp_to_ptr(void *ptr)
+static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
 {
 	return NULL;
 }
-static inline void *tun_ptr_to_xdp(void *ptr)
+static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 596b5ef458f903d4362fb3c69967b6d8a23334bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 18 Aug 2020 18:07:10 -0700
Subject: net-tun: Eliminate two tun/xdp related function calls from vhost-net
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides a minor performance boost by virtue of inlining
instead of cross module function calls.

Test: builds

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819010710.3959310-2-zenczykowski@gmail.com
---
 include/linux/if_tun.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 6c37e1dbc5df..2a7660843444 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -27,9 +27,18 @@ struct tun_xdp_hdr {
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-bool tun_is_xdp_frame(void *ptr);
-void *tun_xdp_to_ptr(struct xdp_frame *xdp);
-struct xdp_frame *tun_ptr_to_xdp(void *ptr);
+static inline bool tun_is_xdp_frame(void *ptr)
+{
+       return (unsigned long)ptr & TUN_XDP_FLAG;
+}
+static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
+{
+       return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
+}
+static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
+{
+       return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+}
 void tun_ptr_free(void *ptr);
 #else
 #include <linux/err.h>
-- 
cgit v1.2.3


From bdfbb63c314a6fb7d27dddd62f5a3e4f062b92bb Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Tue, 18 Aug 2020 12:32:43 +0200
Subject: ptp: Add generic ptp v2 header parsing function

Reason: A lot of the ptp drivers - which implement hardware time stamping - need
specific fields such as the sequence id from the ptp v2 header. Currently all
drivers implement that themselves.

Introduce a generic function to retrieve a pointer to the start of the ptp v2
header.

Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index dd00fa41f7e7..0a9cc0eb0801 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -44,6 +44,30 @@
 #define OFF_IHL		14
 #define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
 
+struct clock_identity {
+	u8 id[8];
+} __packed;
+
+struct port_identity {
+	struct clock_identity	clock_identity;
+	__be16			port_number;
+} __packed;
+
+struct ptp_header {
+	u8			tsmt;  /* transportSpecific | messageType */
+	u8			ver;   /* reserved          | versionPTP  */
+	__be16			message_length;
+	u8			domain_number;
+	u8			reserved1;
+	u8			flag_field[2];
+	__be64			correction;
+	__be32			reserved2;
+	struct port_identity	source_port_identity;
+	__be16			sequence_id;
+	u8			control;
+	u8			log_message_interval;
+} __packed;
+
 #if defined(CONFIG_NET_PTP_CLASSIFY)
 /**
  * ptp_classify_raw - classify a PTP packet
@@ -57,6 +81,21 @@
  */
 unsigned int ptp_classify_raw(const struct sk_buff *skb);
 
+/**
+ * ptp_parse_header - Get pointer to the PTP v2 header
+ * @skb: packet buffer
+ * @type: type of the packet (see ptp_classify_raw())
+ *
+ * This function takes care of the VLAN, UDP, IPv4 and IPv6 headers. The length
+ * is checked.
+ *
+ * Note, internally skb_mac_header() is used. Make sure that the @skb is
+ * initialized accordingly.
+ *
+ * Return: Pointer to the ptp v2 header or NULL if not found
+ */
+struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type);
+
 void __init ptp_classifier_init(void);
 #else
 static inline void ptp_classifier_init(void)
@@ -66,5 +105,10 @@ static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
 {
 	return PTP_CLASS_NONE;
 }
+static inline struct ptp_header *ptp_parse_header(struct sk_buff *skb,
+						  unsigned int type)
+{
+	return NULL;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */
-- 
cgit v1.2.3


From 036c508ba95e573f53bd5bbb79b0c4d71003b319 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Tue, 18 Aug 2020 12:32:44 +0200
Subject: ptp: Add generic ptp message type function

The message type is located at different offsets within the ptp header depending
on the ptp version (v1 or v2). Therefore, drivers which also deal with ptp v1
have some code for it.

Extract this into a helper function for drivers to be used.

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 0a9cc0eb0801..cfc6d9152f69 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -96,6 +96,31 @@ unsigned int ptp_classify_raw(const struct sk_buff *skb);
  */
 struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type);
 
+/**
+ * ptp_get_msgtype - Extract ptp message type from given header
+ * @hdr: ptp header
+ * @type: type of the packet (see ptp_classify_raw())
+ *
+ * This function returns the message type for a given ptp header. It takes care
+ * of the different ptp header versions (v1 or v2).
+ *
+ * Return: The message type
+ */
+static inline u8 ptp_get_msgtype(const struct ptp_header *hdr,
+				 unsigned int type)
+{
+	u8 msgtype;
+
+	if (unlikely(type & PTP_CLASS_V1)) {
+		/* msg type is located at the control field for ptp v1 */
+		msgtype = hdr->control;
+	} else {
+		msgtype = hdr->tsmt & 0x0f;
+	}
+
+	return msgtype;
+}
+
 void __init ptp_classifier_init(void);
 #else
 static inline void ptp_classifier_init(void)
-- 
cgit v1.2.3


From 17060fb5069f2c73a566015ce6e019d5685680b5 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Tue, 18 Aug 2020 12:32:51 +0200
Subject: ptp: Remove unused macro

The offset for the control field is not needed anymore. Remove it.

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index cfc6d9152f69..8437307cca8c 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -36,7 +36,6 @@
 
 #define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
 #define OFF_PTP_SEQUENCE_ID	30
-#define OFF_PTP_CONTROL		32 /* PTPv1 only */
 
 /* Below defines should actually be removed at some point in time. */
 #define IP6_HLEN	40
-- 
cgit v1.2.3


From 005142b8a1f0f32d33fbe04b728464c1b7acfa0e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 18 Aug 2020 21:27:56 -0700
Subject: bpf: Factor out bpf_link_by_id() helper.

Refactor the code a bit to extract bpf_link_by_id() helper.
It's similar to existing bpf_prog_by_id().

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200819042759.51280-2-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 55f694b63164..a9b7185a6b37 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1358,6 +1358,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
 			 struct btf *btf, const struct btf_type *t);
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
+struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 #else /* !CONFIG_BPF_SYSCALL */
-- 
cgit v1.2.3


From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 21 Aug 2020 11:44:18 -0700
Subject: bpf: Implement link_query for bpf iterators

This patch implemented bpf_link callback functions
show_fdinfo and fill_link_info to support link_query
interface.

The general interface for show_fdinfo and fill_link_info
will print/fill the target_name. Each targets can
register show_fdinfo and fill_link_info callbacks
to print/fill more target specific information.

For example, the below is a fdinfo result for a bpf
task iterator.
  $ cat /proc/1749/fdinfo/7
  pos:    0
  flags:  02000000
  mnt_id: 14
  link_type:      iter
  link_id:        11
  prog_tag:       990e1f8152f7e54f
  prog_id:        59
  target_name:    task

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com
---
 include/linux/bpf.h      | 6 ++++++
 include/uapi/linux/bpf.h | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9b7185a6b37..529e9b183eeb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1218,12 +1218,18 @@ typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
 					union bpf_iter_link_info *linfo,
 					struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
+					struct seq_file *seq);
+typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
+					 struct bpf_link_info *info);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
 	bpf_iter_attach_target_t attach_target;
 	bpf_iter_detach_target_t detach_target;
+	bpf_iter_show_fdinfo_t show_fdinfo;
+	bpf_iter_fill_link_info_t fill_link_info;
 	u32 ctx_arg_info_size;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0480f893facd..a1bbaff7a0af 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4071,6 +4071,13 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct {
+			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
+			__u32 target_name_len;	   /* in/out: target_name buffer len */
+			union {
+				__u32 map_id;
+			} map;
+		} iter;
 		struct  {
 			__u32 netns_ino;
 			__u32 attach_type;
-- 
cgit v1.2.3


From b76f22269028fb252727a696084c70494d80a52c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 21 Aug 2020 11:44:19 -0700
Subject: bpf: Implement link_query callbacks in map element iterators

For bpf_map_elem and bpf_sk_local_storage bpf iterators,
additional map_id should be shown for fdinfo and
userspace query. For example, the following is for
a bpf_map_elem iterator.
  $ cat /proc/1753/fdinfo/9
  pos:    0
  flags:  02000000
  mnt_id: 14
  link_type:      iter
  link_id:        34
  prog_tag:       104be6d3fe45e6aa
  prog_id:        173
  target_name:    bpf_map_elem
  map_id: 127

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821184419.574240-1-yhs@fb.com
---
 include/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 529e9b183eeb..30c144af894a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1256,6 +1256,10 @@ int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
+void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
+			      struct seq_file *seq);
+int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
+				struct bpf_link_info *info);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
-- 
cgit v1.2.3


From 7b219da43f94a3b4d5a8aa4cc52b75b34f0301ec Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 21 Aug 2020 11:29:43 +0100
Subject: net: sk_msg: Simplify sk_psock initialization

Initializing psock->sk_proto and other saved callbacks is only
done in sk_psock_update_proto, after sk_psock_init has returned.
The logic for this is difficult to follow, and needlessly complex.

Instead, initialize psock->sk_proto whenever we allocate a new
psock. Additionally, assert the following invariants:

* The SK has no ULP: ULP does it's own finagling of sk->sk_prot
* sk_user_data is unused: we need it to store sk_psock

Protect our access to sk_user_data with sk_callback_lock, which
is what other users like reuseport arrays, etc. do.

The result is that an sk_psock is always fully initialized, and
that psock->sk_proto is always the "original" struct proto.
The latter allows us to use psock->sk_proto when initializing
IPv6 TCP / UDP callbacks for sockmap.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200821102948.21918-2-lmb@cloudflare.com
---
 include/linux/skmsg.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 1e9ed840b9fc..3119928fc103 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
 					 struct sk_psock *psock,
 					 struct proto *ops)
 {
-	/* Initialize saved callbacks and original proto only once, since this
-	 * function may be called multiple times for a psock, e.g. when
-	 * psock->progs.msg_parser is updated.
-	 *
-	 * Since we've not installed the new proto, psock is not yet in use and
-	 * we can initialize it without synchronization.
-	 */
-	if (!psock->sk_proto) {
-		struct proto *orig = READ_ONCE(sk->sk_prot);
-
-		psock->saved_unhash = orig->unhash;
-		psock->saved_close = orig->close;
-		psock->saved_write_space = sk->sk_write_space;
-
-		psock->sk_proto = orig;
-	}
-
 	/* Pairs with lockless read in sk_clone_lock() */
 	WRITE_ONCE(sk->sk_prot, ops);
 }
-- 
cgit v1.2.3


From 13b79d3ffbb8add9e2a6d604db2b49f241b97303 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 21 Aug 2020 11:29:45 +0100
Subject: bpf: sockmap: Call sock_map_update_elem directly

Don't go via map->ops to call sock_map_update_elem, since we know
what function to call in bpf_map_update_value. Since we currently
don't allow calling map_update_elem from BPF context, we can remove
ops->map_update_elem and rename the function to sock_map_update_elem_sys.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200821102948.21918-4-lmb@cloudflare.com
---
 include/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 30c144af894a..81f38e2fda78 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1648,6 +1648,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 			 struct bpf_prog *old, u32 which);
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -1669,6 +1670,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
+					   u64 flags)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_BPF_STREAM_PARSER */
 
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-- 
cgit v1.2.3


From eee049c0ef5b5b433f36841801e34c21c9f82a23 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Sat, 22 Aug 2020 15:59:08 +0100
Subject: l2tp: remove tunnel and session debug flags field

The l2tp subsystem now uses standard kernel logging APIs for
informational and warning messages, and tracepoints for debug
information.

Now that the tunnel and session debug flags are unused, remove the field
from the core structures.

Various system calls (in the case of l2tp_ppp) and netlink messages
handle the getting and setting of debug flags.  To avoid userspace
breakage don't modify the API of these calls; simply ignore set
requests, and send dummy data for get requests.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_pppol2tp.h | 2 +-
 include/uapi/linux/l2tp.h        | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h
index 060b4d1f3129..a91044328bc9 100644
--- a/include/uapi/linux/if_pppol2tp.h
+++ b/include/uapi/linux/if_pppol2tp.h
@@ -75,7 +75,7 @@ struct pppol2tpv3in6_addr {
 };
 
 /* Socket options:
- * DEBUG	- bitmask of debug message categories
+ * DEBUG	- bitmask of debug message categories (not used)
  * SENDSEQ	- 0 => don't send packets with sequence numbers
  *		  1 => send packets with sequence numbers
  * RECVSEQ	- 0 => receive packet sequence numbers are optional
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 61158f5a1a5b..88a0d32b8c07 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -108,7 +108,7 @@ enum {
 	L2TP_ATTR_VLAN_ID,		/* u16 (not used) */
 	L2TP_ATTR_COOKIE,		/* 0, 4 or 8 bytes */
 	L2TP_ATTR_PEER_COOKIE,		/* 0, 4 or 8 bytes */
-	L2TP_ATTR_DEBUG,		/* u32, enum l2tp_debug_flags */
+	L2TP_ATTR_DEBUG,		/* u32, enum l2tp_debug_flags (not used) */
 	L2TP_ATTR_RECV_SEQ,		/* u8 */
 	L2TP_ATTR_SEND_SEQ,		/* u8 */
 	L2TP_ATTR_LNS_MODE,		/* u8 */
@@ -177,7 +177,9 @@ enum l2tp_seqmode {
 };
 
 /**
- * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions
+ * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions.
+ *
+ * Unused.
  *
  * @L2TP_MSG_DEBUG: verbose debug (if compiled in)
  * @L2TP_MSG_CONTROL: userspace - kernel interface
-- 
cgit v1.2.3


From 70a217f1976f75a6cfe8223e5669ad7b405daaad Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:14 -0700
Subject: tcp: Use a struct to represent a saved_syn

The TCP_SAVE_SYN has both the network header and tcp header.
The total length of the saved syn packet is currently stored in
the first 4 bytes (u32) of an array and the actual packet data is
stored after that.

A later patch will add a bpf helper that allows to get the tcp header
alone from the saved syn without the network header.  It will be more
convenient to have a direct offset to a specific header instead of
re-parsing it.  This requires to separately store the network hdrlen.
The total header length (i.e. network + tcp) is still needed for the
current usage in getsockopt.  Although this total length can be obtained
by looking into the tcphdr and then get the (th->doff << 2), this patch
chooses to directly store the tcp hdrlen in the second four bytes of
this newly created "struct saved_syn".  By using a new struct, it can
give a readable name to each individual header length.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190014.2883694-1-kafai@fb.com
---
 include/linux/tcp.h        | 7 ++++++-
 include/net/request_sock.h | 8 +++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 14b62d7df942..2088d5a079af 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -406,7 +406,7 @@ struct tcp_sock {
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
 	struct request_sock __rcu *fastopen_rsk;
-	u32	*saved_syn;
+	struct saved_syn *saved_syn;
 };
 
 enum tsq_enum {
@@ -484,6 +484,11 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 	tp->saved_syn = NULL;
 }
 
+static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
+{
+	return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
 					       const struct sk_buff *orig_skb);
 
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b2eb8b4ba697..7d9ed99a77bd 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -41,6 +41,12 @@ struct request_sock_ops {
 
 int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
 
+struct saved_syn {
+	u32 network_hdrlen;
+	u32 tcp_hdrlen;
+	u8 data[];
+};
+
 /* struct request_sock - mini sock to represent a connection request
  */
 struct request_sock {
@@ -60,7 +66,7 @@ struct request_sock {
 	struct timer_list		rsk_timer;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
-	u32				*saved_syn;
+	struct saved_syn		*saved_syn;
 	u32				secid;
 	u32				peer_secid;
 };
-- 
cgit v1.2.3


From 2b8ee4f05d4f6a6c427ad30dd6c1bb49eb2efd3b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:21 -0700
Subject: tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt

This change is mostly from an internal patch and adapts it from sysctl
config to the bpf_setsockopt setup.

The bpf_prog can set the max delay ack by using
bpf_setsockopt(TCP_BPF_DELACK_MAX).  This max delay ack can be communicated
to its peer through bpf header option.  The receiving peer can then use
this max delay ack and set a potentially lower rto by using
bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced
in the next patch.

Another later selftest patch will also use it like the above to show
how to write and parse bpf tcp header option.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com
---
 include/net/inet_connection_sock.h | 1 +
 include/uapi/linux/bpf.h           | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index aa8893c68c50..da7264a1ebfc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,7 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
+	__u32                     icsk_delack_max;
 	__u32			  icsk_pmtu_cookie;
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a1bbaff7a0af..7b905cb0213e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4257,6 +4257,7 @@ enum {
 enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
+	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From ca584ba070864c606f3a54faaafe774726d5b4a1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:27 -0700
Subject: tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt

This patch adds bpf_setsockopt(TCP_BPF_RTO_MIN) to allow bpf prog
to set the min rto of a connection.  It could be used together
with the earlier patch which has added bpf_setsockopt(TCP_BPF_DELACK_MAX).

A later selftest patch will communicate the max delay ack in a
bpf tcp header option and then the receiving side can use
bpf_setsockopt(TCP_BPF_RTO_MIN) to set a shorter rto.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190027.2884170-1-kafai@fb.com
---
 include/net/inet_connection_sock.h | 1 +
 include/net/tcp.h                  | 2 +-
 include/uapi/linux/bpf.h           | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index da7264a1ebfc..c738abeb3265 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,7 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
+	__u32                     icsk_rto_min;
 	__u32                     icsk_delack_max;
 	__u32			  icsk_pmtu_cookie;
 	const struct tcp_congestion_ops *icsk_ca_ops;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab6c7510b5b..dda778c782fe 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -699,7 +699,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
 static inline u32 tcp_rto_min(struct sock *sk)
 {
 	const struct dst_entry *dst = __sk_dst_get(sk);
-	u32 rto_min = TCP_RTO_MIN;
+	u32 rto_min = inet_csk(sk)->icsk_rto_min;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
 		rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7b905cb0213e..1ae20058b574 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4258,6 +4258,7 @@ enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
+	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 7656d68455891f7fc6689f95415fd59e7a1d629b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:33 -0700
Subject: tcp: Add saw_unknown to struct tcp_options_received

In a later patch, the bpf prog only wants to be called to handle
a header option if that particular header option cannot be handled by
the kernel.  This unknown option could be written by the peer's bpf-prog.
It could also be a new standard option that the running kernel does not
support it while a bpf-prog can handle it.

This patch adds a "saw_unknown" bit to "struct tcp_options_received"
and it uses an existing one byte hole to do that.  "saw_unknown" will
be set in tcp_parse_options() if it sees an option that the kernel
cannot handle.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190033.2884430-1-kafai@fb.com
---
 include/linux/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2088d5a079af..29d166263ae7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -92,6 +92,8 @@ struct tcp_options_received {
 		smc_ok : 1,	/* SMC seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
+	u8	saw_unknown:1,	/* Received unknown option		*/
+		unused:7;
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
-- 
cgit v1.2.3


From 72be0fe6ba76282704cb84952bd5a1eb47910290 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:39 -0700
Subject: bpf: tcp: Add bpf_skops_established()

In tcp_init_transfer(), it currently calls the bpf prog to give it a
chance to handle the just "ESTABLISHED" event (e.g. do setsockopt
on the newly established sk).  Right now, it is done by calling the
general purpose tcp_call_bpf().

In the later patch, it also needs to pass the just-received skb which
concludes the 3 way handshake. E.g. the SYNACK received at the active side.
The bpf prog can then learn some specific header options written by the
peer's bpf-prog and potentially do setsockopt on the newly established sk.
Thus, instead of reusing the general purpose tcp_call_bpf(), a new function
bpf_skops_established() is added to allow passing the "skb" to the bpf
prog.  The actual skb passing from bpf_skops_established() to the bpf prog
will happen together in a later patch which has the necessary bpf pieces.

A "skb" arg is also added to tcp_init_transfer() such that
it can then be passed to bpf_skops_established().

Calling the new bpf_skops_established() instead of tcp_call_bpf()
should be a noop in this patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190039.2884750-1-kafai@fb.com
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dda778c782fe..c186dbf731e1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,7 +394,7 @@ void tcp_metrics_init(void);
 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
 __poll_t tcp_poll(struct file *file, struct socket *sock,
 		      struct poll_table_struct *wait);
 int tcp_getsockopt(struct sock *sk, int level, int optname,
-- 
cgit v1.2.3


From 00d211a4ea6f48e8e3b758813fe23ad28193d3bf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:46 -0700
Subject: bpf: tcp: Add bpf_skops_parse_hdr()

The patch adds a function bpf_skops_parse_hdr().
It will call the bpf prog to parse the TCP header received at
a tcp_sock that has at least reached the ESTABLISHED state.

For the packets received during the 3WHS (SYN, SYNACK and ACK),
the received skb will be available to the bpf prog during the callback
in bpf_skops_established() introduced in the previous patch and
in the bpf_skops_write_hdr_opt() that will be added in the
next patch.

Calling bpf prog to parse header is controlled by two new flags in
tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG and
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG.

When BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG is set,
the bpf prog will only be called when there is unknown
option in the TCP header.

When BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG is set,
the bpf prog will be called on all received TCP header.

This function is half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190046.2885054-1-kafai@fb.com
---
 include/uapi/linux/bpf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1ae20058b574..010ed2abcb66 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4173,8 +4173,10 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xF,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From 331fca4315efa3bbd258fbdf8209d59d253c0480 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:52 -0700
Subject: bpf: tcp: Add bpf_skops_hdr_opt_len() and bpf_skops_write_hdr_opt()

The bpf prog needs to parse the SYN header to learn what options have
been sent by the peer's bpf-prog before writing its options into SYNACK.
This patch adds a "syn_skb" arg to tcp_make_synack() and send_synack().
This syn_skb will eventually be made available (as read-only) to the
bpf prog.  This will be the only SYN packet available to the bpf
prog during syncookie.  For other regular cases, the bpf prog can
also use the saved_syn.

When writing options, the bpf prog will first be called to tell the
kernel its required number of bytes.  It is done by the new
bpf_skops_hdr_opt_len().  The bpf prog will only be called when the new
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG is set in tp->bpf_sock_ops_cb_flags.
When the bpf prog returns, the kernel will know how many bytes are needed
and then update the "*remaining" arg accordingly.  4 byte alignment will
be included in the "*remaining" before this function returns.  The 4 byte
aligned number of bytes will also be stored into the opts->bpf_opt_len.
"bpf_opt_len" is a newly added member to the struct tcp_out_options.

Then the new bpf_skops_write_hdr_opt() will call the bpf prog to write the
header options.  The bpf prog is only called if it has reserved spaces
before (opts->bpf_opt_len > 0).

The bpf prog is the last one getting a chance to reserve header space
and writing the header option.

These two functions are half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190052.2885316-1-kafai@fb.com
---
 include/net/tcp.h        | 6 ++++--
 include/uapi/linux/bpf.h | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c186dbf731e1..3e768a6b8264 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -455,7 +455,8 @@ enum tcp_synack_type {
 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct tcp_fastopen_cookie *foc,
-				enum tcp_synack_type synack_type);
+				enum tcp_synack_type synack_type,
+				struct sk_buff *syn_skb);
 int tcp_disconnect(struct sock *sk, int flags);
 
 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -2035,7 +2036,8 @@ struct tcp_request_sock_ops {
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
-			   enum tcp_synack_type synack_type);
+			   enum tcp_synack_type synack_type,
+			   struct sk_buff *syn_skb);
 };
 
 extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 010ed2abcb66..18d0e128bc3c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4175,8 +4175,9 @@ enum {
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
 	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From c9985d09e18965131958102f4b67fa1e742df335 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:58 -0700
Subject: bpf: sock_ops: Change some members of sock_ops_kern from u32 to u8

A later patch needs to add a few pointers and a few u8 to
sock_ops_kern.  Hence, this patch saves some spaces by moving
some of the existing members from u32 to u8 so that the later
patch can still fit everything in a cacheline.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190058.2885640-1-kafai@fb.com
---
 include/linux/filter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0a355b005bf4..c427dfa5f908 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1236,13 +1236,13 @@ struct bpf_sock_addr_kern {
 
 struct bpf_sock_ops_kern {
 	struct	sock *sk;
-	u32	op;
 	union {
 		u32 args[4];
 		u32 reply;
 		u32 replylong[4];
 	};
-	u32	is_fullsock;
+	u8	op;
+	u8	is_fullsock;
 	u64	temp;			/* temp and everything after is not
 					 * initialized to 0 before calling
 					 * the BPF program. New fields that
-- 
cgit v1.2.3


From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:04 -0700
Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Note: The TCP changes here is mainly to implement the bpf
  pieces into the bpf_skops_*() functions introduced
  in the earlier patches. ]

The earlier effort in BPF-TCP-CC allows the TCP Congestion Control
algorithm to be written in BPF.  It opens up opportunities to allow
a faster turnaround time in testing/releasing new congestion control
ideas to production environment.

The same flexibility can be extended to writing TCP header option.
It is not uncommon that people want to test new TCP header option
to improve the TCP performance.  Another use case is for data-center
that has a more controlled environment and has more flexibility in
putting header options for internal only use.

For example, we want to test the idea in putting maximum delay
ACK in TCP header option which is similar to a draft RFC proposal [1].

This patch introduces the necessary BPF API and use them in the
TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse
and write TCP header options.  It currently supports most of
the TCP packet except RST.

Supported TCP header option:
───────────────────────────
This patch allows the bpf-prog to write any option kind.
Different bpf-progs can write its own option by calling the new helper
bpf_store_hdr_opt().  The helper will ensure there is no duplicated
option in the header.

By allowing bpf-prog to write any option kind, this gives a lot of
flexibility to the bpf-prog.  Different bpf-prog can write its
own option kind.  It could also allow the bpf-prog to support a
recently standardized option on an older kernel.

Sockops Callback Flags:
──────────────────────
The bpf program will only be called to parse/write tcp header option
if the following newly added callback flags are enabled
in tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG

A few words on the PARSE CB flags.  When the above PARSE CB flags are
turned on, the bpf-prog will be called on packets received
at a sk that has at least reached the ESTABLISHED state.
The parsing of the SYN-SYNACK-ACK will be discussed in the
"3 Way HandShake" section.

The default is off for all of the above new CB flags, i.e. the bpf prog
will not be called to parse or write bpf hdr option.  There are
details comment on these new cb flags in the UAPI bpf.h.

sock_ops->skb_data and bpf_load_hdr_opt()
─────────────────────────────────────────
sock_ops->skb_data and sock_ops->skb_data_end covers the whole
TCP header and its options.  They are read only.

The new bpf_load_hdr_opt() helps to read a particular option "kind"
from the skb_data.

Please refer to the comment in UAPI bpf.h.  It has details
on what skb_data contains under different sock_ops->op.

3 Way HandShake
───────────────
The bpf-prog can learn if it is sending SYN or SYNACK by reading the
sock_ops->skb_tcp_flags.

* Passive side

When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB),
the received SYN skb will be available to the bpf prog.  The bpf prog can
use the SYN skb (which may carry the header option sent from the remote bpf
prog) to decide what bpf header option should be written to the outgoing
SYNACK skb.  The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*).
More on this later.  Also, the bpf prog can learn if it is in syncookie
mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE).

The bpf prog can store the received SYN pkt by using the existing
bpf_setsockopt(TCP_SAVE_SYN).  The example in a later patch does it.
[ Note that the fullsock here is a listen sk, bpf_sk_storage
  is not very useful here since the listen sk will be shared
  by many concurrent connection requests.

  Extending bpf_sk_storage support to request_sock will add weight
  to the minisock and it is not necessary better than storing the
  whole ~100 bytes SYN pkt. ]

When the connection is established, the bpf prog will be called
in the existing PASSIVE_ESTABLISHED_CB callback.  At that time,
the bpf prog can get the header option from the saved syn and
then apply the needed operation to the newly established socket.
The later patch will use the max delay ack specified in the SYN
header and set the RTO of this newly established connection
as an example.

The received ACK (that concludes the 3WHS) will also be available to
the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data.
It could be useful in syncookie scenario.  More on this later.

There is an existing getsockopt "TCP_SAVED_SYN" to return the whole
saved syn pkt which includes the IP[46] header and the TCP header.
A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to
start getting from, e.g. starting from TCP header, or from IP[46] header.

The new getsockopt(TCP_BPF_SYN*) will also know where it can get
the SYN's packet from:
  - (a) the just received syn (available when the bpf prog is writing SYNACK)
        and it is the only way to get SYN during syncookie mode.
  or
  - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other
        existing CB).

The bpf prog does not need to know where the SYN pkt is coming from.
The getsockopt(TCP_BPF_SYN*) will hide this details.

Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to
bpf_load_hdr_opt() to read a particular header option from the SYN packet.

* Fastopen

Fastopen should work the same as the regular non fastopen case.
This is a test in a later patch.

* Syncookie

For syncookie, the later example patch asks the active
side's bpf prog to resend the header options in ACK.  The server
can use bpf_load_hdr_opt() to look at the options in this
received ACK during PASSIVE_ESTABLISHED_CB.

* Active side

The bpf prog will get a chance to write the bpf header option
in the SYN packet during WRITE_HDR_OPT_CB.  The received SYNACK
pkt will also be available to the bpf prog during the existing
ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data
and bpf_load_hdr_opt().

* Turn off header CB flags after 3WHS

If the bpf prog does not need to write/parse header options
beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags
to avoid being called for header options.
Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on
so that the kernel will only call it when there is option that
the kernel cannot handle.

[1]: draft-wang-tcpm-low-latency-opt-00
     https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com
---
 include/linux/bpf-cgroup.h |  25 ++++
 include/linux/filter.h     |   4 +
 include/net/tcp.h          |  49 ++++++++
 include/uapi/linux/bpf.h   | 300 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 377 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 64f367044e25..2f98d2fce62e 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -279,6 +279,31 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)			\
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
 
+/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
+ * fullsock and its parent fullsock cannot be traced by
+ * sk_to_full_sk().
+ *
+ * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
+ * Its listener-sk is not attached to the rsk_listener.
+ * In this case, the caller holds the listener-sk (unlocked),
+ * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
+ * the listener-sk such that the cgroup-bpf-progs of the
+ * listener-sk will be run.
+ *
+ * Regardless of syncookie mode or not,
+ * calling bpf_setsockopt on listener-sk will not make sense anyway,
+ * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
+ */
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk)			\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled)						\
+		__ret = __cgroup_bpf_run_filter_sock_ops(sk,		\
+							 sock_ops,	\
+							 BPF_CGROUP_SOCK_OPS); \
+	__ret;								\
+})
+
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
 ({									       \
 	int __ret = 0;							       \
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c427dfa5f908..995625950cc1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1241,8 +1241,12 @@ struct bpf_sock_ops_kern {
 		u32 reply;
 		u32 replylong[4];
 	};
+	struct sk_buff	*syn_skb;
+	struct sk_buff	*skb;
+	void	*skb_data_end;
 	u8	op;
 	u8	is_fullsock;
+	u8	remaining_opt_len;
 	u64	temp;			/* temp and everything after is not
 					 * initialized to 0 before calling
 					 * the BPF program. New fields that
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3e768a6b8264..1f967b4e22f6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2235,6 +2235,55 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		      struct msghdr *msg, int len, int flags);
 #endif /* CONFIG_NET_SOCK_MSG */
 
+#ifdef CONFIG_CGROUP_BPF
+/* Copy the listen sk's HDR_OPT_CB flags to its child.
+ *
+ * During 3-Way-HandShake, the synack is usually sent from
+ * the listen sk with the HDR_OPT_CB flags set so that
+ * bpf-prog will be called to write the BPF hdr option.
+ *
+ * In fastopen, the child sk is used to send synack instead
+ * of the listen sk.  Thus, inheriting the HDR_OPT_CB flags
+ * from the listen sk gives the bpf-prog a chance to write
+ * BPF hdr option in the synack pkt during fastopen.
+ *
+ * Both fastopen and non-fastopen child will inherit the
+ * HDR_OPT_CB flags to keep the bpf-prog having a consistent
+ * behavior when deciding to clear this cb flags (or not)
+ * during the PASSIVE_ESTABLISHED_CB.
+ *
+ * In the future, other cb flags could be inherited here also.
+ */
+static inline void bpf_skops_init_child(const struct sock *sk,
+					struct sock *child)
+{
+	tcp_sk(child)->bpf_sock_ops_cb_flags =
+		tcp_sk(sk)->bpf_sock_ops_cb_flags &
+		(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
+		 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+		 BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+				      struct sk_buff *skb,
+				      unsigned int end_offset)
+{
+	skops->skb = skb;
+	skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_child(const struct sock *sk,
+					struct sock *child)
+{
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+				      struct sk_buff *skb,
+				      unsigned int end_offset)
+{
+}
+#endif
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18d0e128bc3c..f67ec5d9e57d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3395,6 +3395,120 @@ union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ *	Description
+ *		Load header option.  Support reading a particular TCP header
+ *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ *		If *flags* is 0, it will search the option from the
+ *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		has details on what skb_data contains under different
+ *		sock_ops->op.
+ *
+ *		The first byte of the *searchby_res* specifies the
+ *		kind that it wants to search.
+ *
+ *		If the searching kind is an experimental kind
+ *		(i.e. 253 or 254 according to RFC6994).  It also
+ *		needs to specify the "magic" which is either
+ *		2 bytes or 4 bytes.  It then also needs to
+ *		specify the size of the magic by using
+ *		the 2nd byte which is "kind-length" of a TCP
+ *		header option and the "kind-length" also
+ *		includes the first 2 bytes "kind" and "kind-length"
+ *		itself as a normal TCP header option also does.
+ *
+ *		For example, to search experimental kind 254 with
+ *		2 byte magic 0xeB9F, the searchby_res should be
+ *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ *		To search for the standard window scale option (3),
+ *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		Note, kind-length must be 0 for regular option.
+ *
+ *		Searching for No-Op (0) and End-of-Option-List (1) are
+ *		not supported.
+ *
+ *		*len* must be at least 2 bytes which is the minimal size
+ *		of a header option.
+ *
+ *		Supported flags:
+ *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ *		  saved_syn packet or the just-received syn packet.
+ *
+ *	Return
+ *		>0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied.
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOMSG** The option is not found
+ *
+ *		**-ENOENT** No syn packet available when
+ *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ *		**-ENOSPC** Not enough space.  Only *len* number of
+ *			    bytes are copied.
+ *
+ *		**-EFAULT** Cannot parse the header options in the packet
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ *	Description
+ *		Store header option.  The data will be copied
+ *		from buffer *from* with length *len* to the TCP header.
+ *
+ *		The buffer *from* should have the whole option that
+ *		includes the kind, kind-length, and the actual
+ *		option data.  The *len* must be at least kind-length
+ *		long.  The kind-length does not have to be 4 byte
+ *		aligned.  The kernel will take care of the padding
+ *		and setting the 4 bytes aligned value to th->doff.
+ *
+ *		This helper will check for duplicated option
+ *		by searching the same option in the outgoing skb.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *			    Nothing has been written
+ *
+ *		**-EEXIST** The option has already existed
+ *
+ *		**-EFAULT** Cannot parse the existing header options
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ *	Description
+ *		Reserve *len* bytes for the bpf header option.  The
+ *		space will be used by bpf_store_hdr_opt() later in
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		the total number of bytes will be reserved.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** if param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3539,6 +3653,9 @@ union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(load_hdr_opt),		\
+	FN(store_hdr_opt),		\
+	FN(reserve_hdr_opt),
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4165,6 +4282,36 @@ struct bpf_sock_ops {
 	__u64 bytes_received;
 	__u64 bytes_acked;
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	/* [skb_data, skb_data_end) covers the whole TCP header.
+	 *
+	 * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+	 * BPF_SOCK_OPS_HDR_OPT_LEN_CB:   Not useful because the
+	 *                                header has not been written.
+	 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+	 *				  been written so far.
+	 * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:  The SYNACK that concludes
+	 *					the 3WHS.
+	 * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+	 *					the 3WHS.
+	 *
+	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 */
+	__bpf_md_ptr(void *, skb_data);
+	__bpf_md_ptr(void *, skb_data_end);
+	__u32 skb_len;		/* The total length of a packet.
+				 * It includes the header, options,
+				 * and payload.
+				 */
+	__u32 skb_tcp_flags;	/* tcp_flags of the header.  It provides
+				 * an easy way to check for tcp_flags
+				 * without parsing skb_data.
+				 *
+				 * In particular, the skb_tcp_flags
+				 * will still be available in
+				 * BPF_SOCK_OPS_HDR_OPT_LEN even though
+				 * the outgoing header has not
+				 * been written yet.
+				 */
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
@@ -4173,8 +4320,48 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
-	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	/* Call bpf for all received TCP headers.  The bpf prog will be
+	 * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 *
+	 * It could be used at the client/active side (i.e. connect() side)
+	 * when the server told it that the server was in syncookie
+	 * mode and required the active side to resend the bpf-written
+	 * options.  The active side can keep writing the bpf-options until
+	 * it received a valid packet from the server side to confirm
+	 * the earlier packet (and options) has been received.  The later
+	 * example patch is using it like this at the active side when the
+	 * server is in syncookie mode.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG	= (1<<4),
+	/* Call bpf when kernel has received a header option that
+	 * the kernel cannot handle.  The bpf prog will be called under
+	 * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 */
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	/* Call bpf when the kernel is writing header options for the
+	 * outgoing packet.  The bpf prog will first be called
+	 * to reserve space in a skb under
+	 * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB.  Then
+	 * the bpf prog will be called to write the header option(s)
+	 * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+	 * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+	 * related helpers that will be useful to the bpf programs.
+	 *
+	 * The kernel gets its chance to reserve space and write
+	 * options first before the BPF program does.
+	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
 	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
@@ -4233,6 +4420,63 @@ enum {
 					 */
 	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
 					 */
+	BPF_SOCK_OPS_PARSE_HDR_OPT_CB,	/* Parse the header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the TCP header only.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option.
+					 */
+	BPF_SOCK_OPS_HDR_OPT_LEN_CB,	/* Reserve space for writing the
+					 * header option later in
+					 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Not available because no header has
+					 * been	written yet.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the
+					 * outgoing skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_reserve_hdr_opt() should
+					 * be used to reserve space.
+					 */
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB,	/* Write the header options
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the outgoing skb.
+					 * It covers the TCP header
+					 * that has already been written
+					 * by the kernel and the
+					 * earlier bpf-progs.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the outgoing
+					 * skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_store_hdr_opt() should
+					 * be used to write the
+					 * option.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option that
+					 * has already been written
+					 * by the kernel or the
+					 * earlier bpf-progs.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4262,6 +4506,60 @@ enum {
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
+	/* Copy the SYN pkt to optval
+	 *
+	 * BPF_PROG_TYPE_SOCK_OPS only.  It is similar to the
+	 * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+	 * to only getting from the saved_syn.  It can either get the
+	 * syn packet from:
+	 *
+	 * 1. the just-received SYN packet (only available when writing the
+	 *    SYNACK).  It will be useful when it is not necessary to
+	 *    save the SYN packet for latter use.  It is also the only way
+	 *    to get the SYN during syncookie mode because the syn
+	 *    packet cannot be saved during syncookie.
+	 *
+	 * OR
+	 *
+	 * 2. the earlier saved syn which was done by
+	 *    bpf_setsockopt(TCP_SAVE_SYN).
+	 *
+	 * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+	 * SYN packet is obtained.
+	 *
+	 * If the bpf-prog does not need the IP[46] header,  the
+	 * bpf-prog can avoid parsing the IP header by using
+	 * TCP_BPF_SYN.  Otherwise, the bpf-prog can get both
+	 * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+	 *
+	 *      >0: Total number of bytes copied
+	 * -ENOSPC: Not enough space in optval. Only optlen number of
+	 *          bytes is copied.
+	 * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+	 *	    is not saved by setsockopt(TCP_SAVE_SYN).
+	 */
+	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
+	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+};
+
+enum {
+	BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+	BPF_WRITE_HDR_TCP_CURRENT_MSS = 1,	/* Kernel is finding the
+						 * total option spaces
+						 * required for an established
+						 * sk in order to calculate the
+						 * MSS.  No skb is actually
+						 * sent.
+						 */
+	BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2,	/* Kernel is in syncookie mode
+						 * when sending a SYN.
+						 */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:23 -0700
Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN

This patch is adapted from Eric's patch in an earlier discussion [1].

The TCP_SAVE_SYN currently only stores the network header and
tcp header.  This patch allows it to optionally store
the mac header also if the setsockopt's optval is 2.

It requires one more bit for the "save_syn" bit field in tcp_sock.
This patch achieves this by moving the syn_smc bit next to the is_mptcp.
The syn_smc is currently used with the TCP experimental option.  Since
syn_smc is only used when CONFIG_SMC is enabled, this patch also puts
the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did
with "IS_ENABLED(CONFIG_MPTCP)".

The mac_hdrlen is also stored in the "struct saved_syn"
to allow a quick offset from the bpf prog if it chooses to start
getting from the network header or the tcp header.

[1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com
---
 include/linux/tcp.h        | 13 ++++++++-----
 include/net/request_sock.h |  1 +
 include/uapi/linux/bpf.h   |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 29d166263ae7..56ff2952edaf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,14 +239,13 @@ struct tcp_sock {
 		repair      : 1,
 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
 	u8	repair_queue;
-	u8	syn_data:1,	/* SYN includes data */
+	u8	save_syn:2,	/* Save headers of SYN packet */
+		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
-		save_syn:1,	/* Save headers of SYN packet */
-		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
-		syn_smc:1;	/* SYN includes SMC */
+		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */
 
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
@@ -393,6 +392,9 @@ struct tcp_sock {
 #if IS_ENABLED(CONFIG_MPTCP)
 	bool	is_mptcp;
 #endif
+#if IS_ENABLED(CONFIG_SMC)
+	bool	syn_smc;	/* SYN includes SMC */
+#endif
 
 #ifdef CONFIG_TCP_MD5SIG
 /* TCP AF-Specific parts; only used by MD5 Signature support so far */
@@ -488,7 +490,8 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 
 static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 {
-	return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen;
+	return saved_syn->mac_hdrlen + saved_syn->network_hdrlen +
+		saved_syn->tcp_hdrlen;
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 7d9ed99a77bd..29e41ff3ec93 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -42,6 +42,7 @@ struct request_sock_ops {
 int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
 
 struct saved_syn {
+	u32 mac_hdrlen;
 	u32 network_hdrlen;
 	u32 tcp_hdrlen;
 	u8 data[];
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f67ec5d9e57d..544b89a64918 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4540,6 +4540,7 @@ enum {
 	 */
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
 };
 
 enum {
-- 
cgit v1.2.3


From 583bbf0624dfd8fc45f1049be1d4980be59451ff Mon Sep 17 00:00:00 2001
From: Luke Hsiao <lukehsiao@google.com>
Date: Fri, 21 Aug 2020 21:41:04 -0700
Subject: io_uring: allow tcp ancillary data for __sys_recvmsg_sock()

For TCP tx zero-copy, the kernel notifies the process of completions by
queuing completion notifications on the socket error queue. This patch
allows reading these notifications via recvmsg to support TCP tx
zero-copy.

Ancillary data was originally disallowed due to privilege escalation
via io_uring's offloading of sendmsg() onto a kernel thread with kernel
credentials (https://crbug.com/project-zero/1975). So, we must ensure
that the socket type is one where the ancillary data types that are
delivered on recvmsg are plain data (no file descriptors or values that
are translated based on the identity of the calling process).

This was tested by using io_uring to call recvmsg on the MSG_ERRQUEUE
with tx zero-copy enabled. Before this patch, we received -EINVALID from
this specific code path. After this patch, we could read tcp tx
zero-copy completion notifications from the MSG_ERRQUEUE.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Arjun Roy <arjunroy@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Luke Hsiao <lukehsiao@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..7657c6432a69 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,8 @@ struct net;
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
 
+#define PROTO_CMSG_DATA_ONLY	0x0001
+
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
  * enum sock_type - Socket types
@@ -135,6 +137,7 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 
 struct proto_ops {
 	int		family;
+	unsigned int	flags;
 	struct module	*owner;
 	int		(*release)   (struct socket *sock);
 	int		(*bind)	     (struct socket *sock,
-- 
cgit v1.2.3


From 755f982bb1ff469a181df3eaf8dd5d769267ab8e Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Sun, 23 Aug 2020 14:19:26 +0300
Subject: qed/qede: make devlink survive recovery

Devlink instance lifecycle was linked to qed_dev object,
that caused devlink to be recreated on each recovery.

Changing it by making higher level driver (qede) responsible for its
life. This way devlink now survives recoveries.

qede now stores devlink structure pointer as a part of its device
object, devlink private data contains a linkage structure,
qed_devlink.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Alexander Lobakin <alobakin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index cd6a5c7e56eb..d8368e1770df 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -21,6 +21,7 @@
 #include <linux/qed/common_hsi.h>
 #include <linux/qed/qed_chain.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <net/devlink.h>
 
 enum dcbx_protocol_type {
 	DCBX_PROTOCOL_ISCSI,
@@ -779,6 +780,10 @@ enum qed_nvm_flash_cmd {
 	QED_NVM_FLASH_CMD_NVM_MAX,
 };
 
+struct qed_devlink {
+	struct qed_dev *cdev;
+};
+
 struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void (*link_update)(void *dev, struct qed_link_output *link);
@@ -1137,6 +1142,10 @@ struct qed_common_ops {
  *
  */
 	int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val);
+
+	struct devlink* (*devlink_register)(struct qed_dev *cdev);
+
+	void (*devlink_unregister)(struct devlink *devlink);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 9524067b9a91dce2a096a0de7727c217495e3d2e Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Sun, 23 Aug 2020 14:19:29 +0300
Subject: qed: health reporter init deinit seq

Here we declare health reporter ops (empty for now)
and register these in qed probe and remove callbacks.

This way we get devlink attached to all kind of qed* PCI
device entities: networking or storage offload entity.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Alexander Lobakin <alobakin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index d8368e1770df..30fe06fe06a0 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -782,6 +782,7 @@ enum qed_nvm_flash_cmd {
 
 struct qed_devlink {
 	struct qed_dev *cdev;
+	struct devlink_health_reporter *fw_reporter;
 };
 
 struct qed_common_cb_ops {
-- 
cgit v1.2.3


From 4f5a8db27eb926616500f827d9b81dc40595b0ef Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Sun, 23 Aug 2020 14:19:30 +0300
Subject: qed: use devlink logic to report errors

Use devlink_health_report to push error indications.
We implement this in qede via callback function to make it possible
to reuse the same for other drivers sitting on top of qed in future.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Alexander Lobakin <alobakin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 30fe06fe06a0..a75533de9186 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -906,6 +906,9 @@ struct qed_common_ops {
 
 	int (*dbg_all_data_size) (struct qed_dev *cdev);
 
+	int (*report_fatal_error)(struct devlink *devlink,
+				  enum qed_hw_err_type err_type);
+
 /**
  * @brief can_link_change - can the instance change the link or not
  *
-- 
cgit v1.2.3


From c5c642c55e2fd43fcf42262fd1e87271d413fb42 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Sun, 23 Aug 2020 14:19:33 +0300
Subject: qed: align adjacent indent

Remove extra indent on some of adjacent declarations.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Alexander Lobakin <alobakin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 69 ++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index a75533de9186..56fa55841d39 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -850,10 +850,9 @@ struct qed_common_ops {
 	struct qed_dev*	(*probe)(struct pci_dev *dev,
 				 struct qed_probe_params *params);
 
-	void		(*remove)(struct qed_dev *cdev);
+	void (*remove)(struct qed_dev *cdev);
 
-	int		(*set_power_state)(struct qed_dev *cdev,
-					   pci_power_t state);
+	int (*set_power_state)(struct qed_dev *cdev, pci_power_t state);
 
 	void (*set_name) (struct qed_dev *cdev, char name[]);
 
@@ -861,50 +860,48 @@ struct qed_common_ops {
 	 * PF params required for the call before slowpath_start is
 	 * documented within the qed_pf_params structure definition.
 	 */
-	void		(*update_pf_params)(struct qed_dev *cdev,
-					    struct qed_pf_params *params);
-	int		(*slowpath_start)(struct qed_dev *cdev,
-					  struct qed_slowpath_params *params);
+	void (*update_pf_params)(struct qed_dev *cdev,
+				 struct qed_pf_params *params);
 
-	int		(*slowpath_stop)(struct qed_dev *cdev);
+	int (*slowpath_start)(struct qed_dev *cdev,
+			      struct qed_slowpath_params *params);
+
+	int (*slowpath_stop)(struct qed_dev *cdev);
 
 	/* Requests to use `cnt' interrupts for fastpath.
 	 * upon success, returns number of interrupts allocated for fastpath.
 	 */
-	int		(*set_fp_int)(struct qed_dev *cdev,
-				      u16 cnt);
+	int (*set_fp_int)(struct qed_dev *cdev, u16 cnt);
 
 	/* Fills `info' with pointers required for utilizing interrupts */
-	int		(*get_fp_int)(struct qed_dev *cdev,
-				      struct qed_int_info *info);
-
-	u32		(*sb_init)(struct qed_dev *cdev,
-				   struct qed_sb_info *sb_info,
-				   void *sb_virt_addr,
-				   dma_addr_t sb_phy_addr,
-				   u16 sb_id,
-				   enum qed_sb_type type);
-
-	u32		(*sb_release)(struct qed_dev *cdev,
-				      struct qed_sb_info *sb_info,
-				      u16 sb_id,
-				      enum qed_sb_type type);
-
-	void		(*simd_handler_config)(struct qed_dev *cdev,
-					       void *token,
-					       int index,
-					       void (*handler)(void *));
-
-	void		(*simd_handler_clean)(struct qed_dev *cdev,
-					      int index);
-	int (*dbg_grc)(struct qed_dev *cdev,
-		       void *buffer, u32 *num_dumped_bytes);
+	int (*get_fp_int)(struct qed_dev *cdev, struct qed_int_info *info);
+
+	u32 (*sb_init)(struct qed_dev *cdev,
+		       struct qed_sb_info *sb_info,
+		       void *sb_virt_addr,
+		       dma_addr_t sb_phy_addr,
+		       u16 sb_id,
+		       enum qed_sb_type type);
+
+	u32 (*sb_release)(struct qed_dev *cdev,
+			  struct qed_sb_info *sb_info,
+			  u16 sb_id,
+			  enum qed_sb_type type);
+
+	void (*simd_handler_config)(struct qed_dev *cdev,
+				    void *token,
+				    int index,
+				    void (*handler)(void *));
+
+	void (*simd_handler_clean)(struct qed_dev *cdev, int index);
+
+	int (*dbg_grc)(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
 
 	int (*dbg_grc_size)(struct qed_dev *cdev);
 
-	int (*dbg_all_data) (struct qed_dev *cdev, void *buffer);
+	int (*dbg_all_data)(struct qed_dev *cdev, void *buffer);
 
-	int (*dbg_all_data_size) (struct qed_dev *cdev);
+	int (*dbg_all_data_size)(struct qed_dev *cdev);
 
 	int (*report_fatal_error)(struct devlink *devlink,
 				  enum qed_hw_err_type err_type);
-- 
cgit v1.2.3


From 1f00d375af84fbcdb6dd6c79fd7c3d02d2390338 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:13 +0200
Subject: bpf: Renames in preparation for bpf_local_storage

A purely mechanical change to split the renaming from the actual
generalization.

Flags/consts:

  SK_STORAGE_CREATE_FLAG_MASK	BPF_LOCAL_STORAGE_CREATE_FLAG_MASK
  BPF_SK_STORAGE_CACHE_SIZE	BPF_LOCAL_STORAGE_CACHE_SIZE
  MAX_VALUE_SIZE		BPF_LOCAL_STORAGE_MAX_VALUE_SIZE

Structs:

  bucket			bpf_local_storage_map_bucket
  bpf_sk_storage_map		bpf_local_storage_map
  bpf_sk_storage_data		bpf_local_storage_data
  bpf_sk_storage_elem		bpf_local_storage_elem
  bpf_sk_storage		bpf_local_storage

The "sk" member in bpf_local_storage is also updated to "owner"
in preparation for changing the type to void * in a subsequent patch.

Functions:

  selem_linked_to_sk			selem_linked_to_storage
  selem_alloc				bpf_selem_alloc
  __selem_unlink_sk			bpf_selem_unlink_storage_nolock
  __selem_link_sk			bpf_selem_link_storage_nolock
  selem_unlink_sk			__bpf_selem_unlink_storage
  sk_storage_update			bpf_local_storage_update
  __sk_storage_lookup			bpf_local_storage_lookup
  bpf_sk_storage_map_free		bpf_local_storage_map_free
  bpf_sk_storage_map_alloc		bpf_local_storage_map_alloc
  bpf_sk_storage_map_alloc_check	bpf_local_storage_map_alloc_check
  bpf_sk_storage_map_check_btf		bpf_local_storage_map_check_btf

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-2-kpsingh@chromium.org
---
 include/net/sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 064637d1ddf6..18423cc9cde8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -246,7 +246,7 @@ struct sock_common {
 	/* public: */
 };
 
-struct bpf_sk_storage;
+struct bpf_local_storage;
 
 /**
   *	struct sock - network layer representation of sockets
@@ -517,7 +517,7 @@ struct sock {
 	void                    (*sk_destruct)(struct sock *sk);
 	struct sock_reuseport __rcu	*sk_reuseport_cb;
 #ifdef CONFIG_BPF_SYSCALL
-	struct bpf_sk_storage __rcu	*sk_bpf_storage;
+	struct bpf_local_storage __rcu	*sk_bpf_storage;
 #endif
 	struct rcu_head		sk_rcu;
 };
-- 
cgit v1.2.3


From 4cc9ce4e739961a7b9e6b2f3b27a72124d356373 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:14 +0200
Subject: bpf: Generalize caching for sk_storage.

Provide the a ability to define local storage caches on a per-object
type basis. The caches and caching indices for different objects should
not be inter-mixed as suggested in:

  https://lore.kernel.org/bpf/20200630193441.kdwnkestulg5erii@kafai-mbp.dhcp.thefacebook.com/

  "Caching a sk-storage at idx=0 of a sk should not stop an
  inode-storage to be cached at the same idx of a inode."

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-3-kpsingh@chromium.org
---
 include/net/bpf_sk_storage.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 5036c94c0503..950c5aaba15e 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,6 +3,9 @@
 #ifndef _BPF_SK_STORAGE_H
 #define _BPF_SK_STORAGE_H
 
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
 struct sock;
 
 void bpf_sk_storage_free(struct sock *sk);
@@ -15,6 +18,22 @@ struct sk_buff;
 struct nlattr;
 struct sock;
 
+#define BPF_LOCAL_STORAGE_CACHE_SIZE	16
+
+struct bpf_local_storage_cache {
+	spinlock_t idx_lock;
+	u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+};
+
+#define DEFINE_BPF_STORAGE_CACHE(name)				\
+static struct bpf_local_storage_cache name = {			\
+	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
+}
+
+u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+				      u16 idx);
+
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
 struct bpf_sk_storage_diag *
-- 
cgit v1.2.3


From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:15 +0200
Subject: bpf: Generalize bpf_sk_storage

Refactor the functionality in bpf_sk_storage.c so that concept of
storage linked to kernel objects can be extended to other objects like
inode, task_struct etc.

Each new local storage will still be a separate map and provide its own
set of helpers. This allows for future object specific extensions and
still share a lot of the underlying implementation.

This includes the changes suggested by Martin in:

  https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/

adding new map operations to support bpf_local_storage maps:

* storages for different kernel objects to optionally have different
  memory charging strategy (map_local_storage_charge,
  map_local_storage_uncharge)
* Functionality to extract the storage pointer from a pointer to the
  owning object (map_owner_storage_ptr)

Co-developed-by: Martin KaFai Lau <kafai@fb.com>

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org
---
 include/linux/bpf.h          |  8 +++++++
 include/net/bpf_sk_storage.h | 52 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h     |  8 +++++--
 3 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 81f38e2fda78..8c443b93ac11 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -34,6 +34,8 @@ struct btf_type;
 struct exception_table_entry;
 struct seq_operations;
 struct bpf_iter_aux_info;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -104,6 +106,12 @@ struct bpf_map_ops {
 	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
 			     struct poll_table_struct *pts);
 
+	/* Functions called by bpf_local_storage maps */
+	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+					void *owner, u32 size);
+	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+					   void *owner, u32 size);
+	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 950c5aaba15e..9e631b5466e3 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,8 +3,15 @@
 #ifndef _BPF_SK_STORAGE_H
 #define _BPF_SK_STORAGE_H
 
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
 
 struct sock;
 
@@ -13,6 +20,7 @@ void bpf_sk_storage_free(struct sock *sk);
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
 
+struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
 struct sk_buff;
 struct nlattr;
@@ -34,6 +42,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
 				      u16 idx);
 
+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+			 struct bpf_local_storage_map *smap,
+			 bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type);
+
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+				   struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+				     struct bpf_local_storage_elem *selem,
+				     bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+		bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+			struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+			 void *value, u64 map_flags);
+
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
 struct bpf_sk_storage_diag *
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 544b89a64918..2cbd137eed86 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3765,9 +3765,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
-- 
cgit v1.2.3


From 450af8d0f6be2e7dd2a528a3fb054bb726bf1747 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:16 +0200
Subject: bpf: Split bpf_local_storage to bpf_sk_storage

A purely mechanical change:

	bpf_sk_storage.c = bpf_sk_storage.c + bpf_local_storage.c
	bpf_sk_storage.h = bpf_sk_storage.h + bpf_local_storage.h

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-5-kpsingh@chromium.org
---
 include/linux/bpf_local_storage.h | 163 ++++++++++++++++++++++++++++++++++++++
 include/net/bpf_sk_storage.h      |  61 +-------------
 2 files changed, 164 insertions(+), 60 deletions(-)
 create mode 100644 include/linux/bpf_local_storage.h

(limited to 'include')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
new file mode 100644
index 000000000000..b2c9463f36a1
--- /dev/null
+++ b/include/linux/bpf_local_storage.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#ifndef _BPF_LOCAL_STORAGE_H
+#define _BPF_LOCAL_STORAGE_H
+
+#include <linux/bpf.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <uapi/linux/btf.h>
+
+#define BPF_LOCAL_STORAGE_CACHE_SIZE	16
+
+struct bpf_local_storage_map_bucket {
+	struct hlist_head list;
+	raw_spinlock_t lock;
+};
+
+/* Thp map is not the primary owner of a bpf_local_storage_elem.
+ * Instead, the container object (eg. sk->sk_bpf_storage) is.
+ *
+ * The map (bpf_local_storage_map) is for two purposes
+ * 1. Define the size of the "local storage".  It is
+ *    the map's value_size.
+ *
+ * 2. Maintain a list to keep track of all elems such
+ *    that they can be cleaned up during the map destruction.
+ *
+ * When a bpf local storage is being looked up for a
+ * particular object,  the "bpf_map" pointer is actually used
+ * as the "key" to search in the list of elem in
+ * the respective bpf_local_storage owned by the object.
+ *
+ * e.g. sk->sk_bpf_storage is the mini-map with the "bpf_map" pointer
+ * as the searching key.
+ */
+struct bpf_local_storage_map {
+	struct bpf_map map;
+	/* Lookup elem does not require accessing the map.
+	 *
+	 * Updating/Deleting requires a bucket lock to
+	 * link/unlink the elem from the map.  Having
+	 * multiple buckets to improve contention.
+	 */
+	struct bpf_local_storage_map_bucket *buckets;
+	u32 bucket_log;
+	u16 elem_size;
+	u16 cache_idx;
+};
+
+struct bpf_local_storage_data {
+	/* smap is used as the searching key when looking up
+	 * from the object's bpf_local_storage.
+	 *
+	 * Put it in the same cacheline as the data to minimize
+	 * the number of cachelines access during the cache hit case.
+	 */
+	struct bpf_local_storage_map __rcu *smap;
+	u8 data[] __aligned(8);
+};
+
+/* Linked to bpf_local_storage and bpf_local_storage_map */
+struct bpf_local_storage_elem {
+	struct hlist_node map_node;	/* Linked to bpf_local_storage_map */
+	struct hlist_node snode;	/* Linked to bpf_local_storage */
+	struct bpf_local_storage __rcu *local_storage;
+	struct rcu_head rcu;
+	/* 8 bytes hole */
+	/* The data is stored in aother cacheline to minimize
+	 * the number of cachelines access during a cache hit.
+	 */
+	struct bpf_local_storage_data sdata ____cacheline_aligned;
+};
+
+struct bpf_local_storage {
+	struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
+	struct hlist_head list; /* List of bpf_local_storage_elem */
+	void *owner;		/* The object that owns the above "list" of
+				 * bpf_local_storage_elem.
+				 */
+	struct rcu_head rcu;
+	raw_spinlock_t lock;	/* Protect adding/removing from the "list" */
+};
+
+/* U16_MAX is much more than enough for sk local storage
+ * considering a tcp_sock is ~2k.
+ */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE				       \
+	min_t(u32,                                                             \
+	      (KMALLOC_MAX_SIZE - MAX_BPF_STACK -                              \
+	       sizeof(struct bpf_local_storage_elem)),                         \
+	      (U16_MAX - sizeof(struct bpf_local_storage_elem)))
+
+#define SELEM(_SDATA)                                                          \
+	container_of((_SDATA), struct bpf_local_storage_elem, sdata)
+#define SDATA(_SELEM) (&(_SELEM)->sdata)
+
+#define BPF_LOCAL_STORAGE_CACHE_SIZE	16
+
+struct bpf_local_storage_cache {
+	spinlock_t idx_lock;
+	u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+};
+
+#define DEFINE_BPF_STORAGE_CACHE(name)				\
+static struct bpf_local_storage_cache name = {			\
+	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
+}
+
+u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+				      u16 idx);
+
+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+			 struct bpf_local_storage_map *smap,
+			 bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type);
+
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+				   struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+				     struct bpf_local_storage_elem *selem,
+				     bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+		bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+			struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+			 void *value, u64 map_flags);
+
+#endif /* _BPF_LOCAL_STORAGE_H */
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 9e631b5466e3..3c516dd07caf 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -12,6 +12,7 @@
 #include <net/sock.h>
 #include <uapi/linux/sock_diag.h>
 #include <uapi/linux/btf.h>
+#include <linux/bpf_local_storage.h>
 
 struct sock;
 
@@ -26,66 +27,6 @@ struct sk_buff;
 struct nlattr;
 struct sock;
 
-#define BPF_LOCAL_STORAGE_CACHE_SIZE	16
-
-struct bpf_local_storage_cache {
-	spinlock_t idx_lock;
-	u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
-};
-
-#define DEFINE_BPF_STORAGE_CACHE(name)				\
-static struct bpf_local_storage_cache name = {			\
-	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
-}
-
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
-void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx);
-
-/* Helper functions for bpf_local_storage */
-int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
-
-struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
-
-struct bpf_local_storage_data *
-bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
-			 struct bpf_local_storage_map *smap,
-			 bool cacheit_lockit);
-
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
-
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
-				    const struct btf *btf,
-				    const struct btf_type *key_type,
-				    const struct btf_type *value_type);
-
-void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
-				   struct bpf_local_storage_elem *selem);
-
-bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
-				     struct bpf_local_storage_elem *selem,
-				     bool uncharge_omem);
-
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
-
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
-			struct bpf_local_storage_elem *selem);
-
-void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
-
-struct bpf_local_storage_elem *
-bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
-		bool charge_mem);
-
-int
-bpf_local_storage_alloc(void *owner,
-			struct bpf_local_storage_map *smap,
-			struct bpf_local_storage_elem *first_selem);
-
-struct bpf_local_storage_data *
-bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-			 void *value, u64 map_flags);
-
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
 struct bpf_sk_storage_diag *
-- 
cgit v1.2.3


From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:17 +0200
Subject: bpf: Implement bpf_local_storage for inodes

Similar to bpf_local_storage for sockets, add local storage for inodes.
The life-cycle of storage is managed with the life-cycle of the inode.
i.e. the storage is destroyed along with the owning inode.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
security blob which are now stackable and can co-exist with other LSMs.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org
---
 include/linux/bpf_lsm.h   | 29 +++++++++++++++++++++++++++++
 include/linux/bpf_types.h |  3 +++
 include/uapi/linux/bpf.h  | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 71 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index af74712af585..aaacb6aafc87 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -17,9 +17,28 @@
 #include <linux/lsm_hook_defs.h>
 #undef LSM_HOOK
 
+struct bpf_storage_blob {
+	struct bpf_local_storage __rcu *storage;
+};
+
+extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
+
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog);
 
+static inline struct bpf_storage_blob *bpf_inode(
+	const struct inode *inode)
+{
+	if (unlikely(!inode->i_security))
+		return NULL;
+
+	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
+}
+
+extern const struct bpf_func_proto bpf_inode_storage_get_proto;
+extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+void bpf_inode_storage_free(struct inode *inode);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
@@ -28,6 +47,16 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 	return -EOPNOTSUPP;
 }
 
+static inline struct bpf_storage_blob *bpf_inode(
+	const struct inode *inode)
+{
+	return NULL;
+}
+
+static inline void bpf_inode_storage_free(struct inode *inode)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a52a5688418e..2e6f568377f1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -107,6 +107,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
+#ifdef CONFIG_BPF_LSM
+BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+#endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2cbd137eed86..b6bfcd085a76 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
+	BPF_MAP_TYPE_INODE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3509,6 +3510,41 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from an *inode*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *inode* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ *		helper enforces the key must be an inode and the map must also
+ *		be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *inode* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *inode*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ *	Description
+ *		Delete a bpf_local_storage from an *inode*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3655,7 +3691,9 @@ union bpf_attr {
 	FN(get_task_stack),		\
 	FN(load_hdr_opt),		\
 	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),
+	FN(reserve_hdr_opt),		\
+	FN(inode_storage_get),		\
+	FN(inode_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 30897832d8b97e93833fb52c0a02951db3692ed2 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:18 +0200
Subject: bpf: Allow local storage to be used from LSM programs

Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used
in LSM programs. These helpers are not used for tracing programs
(currently) as their usage is tied to the life-cycle of the object and
should only be used where the owning object won't be freed (when the
owning object is passed as an argument to the LSM hook). Thus, they
are safer to use in LSM hooks than tracing. Usage of local storage in
tracing programs will probably follow a per function based whitelist
approach.

Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock,
it, leads to a compilation warning for LSM programs, it's also updated
to accept a void * pointer instead.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-7-kpsingh@chromium.org
---
 include/net/bpf_sk_storage.h | 2 ++
 include/uapi/linux/bpf.h     | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 3c516dd07caf..119f4c9c3a9c 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk);
 
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+extern const struct bpf_func_proto sk_storage_get_btf_proto;
+extern const struct bpf_func_proto sk_storage_delete_btf_proto;
 
 struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b6bfcd085a76..0e1cdf806fe1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2808,7 +2808,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
  *	Description
  *		Get a bpf-local-storage from a *sk*.
  *
@@ -2824,6 +2824,9 @@ union bpf_attr {
  *		"type". The bpf-local-storage "type" (i.e. the *map*) is
  *		searched against all bpf-local-storages residing at *sk*.
  *
+ *		*sk* is a kernel **struct sock** pointer for LSM program.
+ *		*sk* is a **struct bpf_sock** pointer for other program types.
+ *
  *		An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
  *		used such that a new bpf-local-storage will be
  *		created if one does not exist.  *value* can be used
@@ -2836,7 +2839,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
-- 
cgit v1.2.3


From 6298399bfc101f8e8cf35a916f26aa32bdf04278 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:13 +0200
Subject: bpf: Move btf_resolve_size into __btf_resolve_size

Moving btf_resolve_size into __btf_resolve_size and
keeping btf_resolve_size public with just first 3
arguments, because the rest of the arguments are not
used by outside callers.

Following changes are adding more arguments, which
are not useful to outside callers. They will be added
to the __btf_resolve_size function.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-4-jolsa@kernel.org
---
 include/linux/btf.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8b81fbb4497c..a9af5e7a7ece 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -64,8 +64,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
 						 u32 id, u32 *res_id);
 const struct btf_type *
 btf_resolve_size(const struct btf *btf, const struct btf_type *type,
-		 u32 *type_size, const struct btf_type **elem_type,
-		 u32 *total_nelems);
+		 u32 *type_size);
 
 #define for_each_member(i, struct_type, member)			\
 	for (i = 0, member = btf_type_member(struct_type);	\
-- 
cgit v1.2.3


From faaf4a790d93794b46d67e2fd69b8e5c8cae2d41 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:18 +0200
Subject: bpf: Add btf_struct_ids_match function

Adding btf_struct_ids_match function to check if given address provided
by BTF object + offset is also address of another nested BTF object.

This allows to pass an argument to helper, which is defined via parent
BTF object + offset, like for bpf_d_path (added in following changes):

  SEC("fentry/filp_close")
  int BPF_PROG(prog_close, struct file *file, void *id)
  {
    ...
    ret = bpf_d_path(&file->f_path, ...

The first bpf_d_path argument is hold by verifier as BTF file object
plus offset of f_path member.

The btf_struct_ids_match function will walk the struct file object and
check if there's nested struct path object on the given offset.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-9-jolsa@kernel.org
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8c443b93ac11..540f5e6c3788 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1358,6 +1358,8 @@ int btf_struct_access(struct bpf_verifier_log *log,
 		      const struct btf_type *t, int off, int size,
 		      enum bpf_access_type atype,
 		      u32 *next_btf_id);
+bool btf_struct_ids_match(struct bpf_verifier_log *log,
+			  int off, u32 id, u32 need_type_id);
 int btf_resolve_helper_id(struct bpf_verifier_log *log,
 			  const struct bpf_func_proto *fn, int);
 
-- 
cgit v1.2.3


From eae2e83e62633a2659e3bc690facba1c2fc9c45b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:19 +0200
Subject: bpf: Add BTF_SET_START/END macros

Adding support to define sorted set of BTF ID values.

Following defines sorted set of BTF ID values:

  BTF_SET_START(btf_allowlist_d_path)
  BTF_ID(func, vfs_truncate)
  BTF_ID(func, vfs_fallocate)
  BTF_ID(func, dentry_open)
  BTF_ID(func, vfs_getattr)
  BTF_ID(func, filp_close)
  BTF_SET_END(btf_allowlist_d_path)

It defines following 'struct btf_id_set' variable to access
values and count:

  struct btf_id_set btf_allowlist_d_path;

Adding 'allowed' callback to struct bpf_func_proto, to allow
verifier the check on allowed callers.

Adding btf_id_set_contains function, which will be used by
allowed callbacks to verify the caller's BTF ID value is
within allowed set.

Also removing extra '\' in __BTF_ID_LIST macro.

Added BTF_SET_START_GLOBAL macro for global sets.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-10-jolsa@kernel.org
---
 include/linux/bpf.h     |  4 ++++
 include/linux/btf_ids.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 540f5e6c3788..a6131d95e31e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -317,6 +317,7 @@ struct bpf_func_proto {
 						    * for this argument.
 						    */
 	int *ret_btf_id; /* return value btf_id */
+	bool (*allowed)(const struct bpf_prog *prog);
 };
 
 /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -1878,4 +1879,7 @@ enum bpf_text_poke_type {
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *addr1, void *addr2);
 
+struct btf_id_set;
+bool btf_id_set_contains(struct btf_id_set *set, u32 id);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 4867d549e3c1..210b086188a3 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -3,6 +3,11 @@
 #ifndef _LINUX_BTF_IDS_H
 #define _LINUX_BTF_IDS_H
 
+struct btf_id_set {
+	u32 cnt;
+	u32 ids[];
+};
+
 #ifdef CONFIG_DEBUG_INFO_BTF
 
 #include <linux/compiler.h> /* for __PASTE */
@@ -62,7 +67,7 @@ asm(							\
 ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
 "." #scope " " #name ";                        \n"	\
 #name ":;                                      \n"	\
-".popsection;                                  \n");	\
+".popsection;                                  \n");
 
 #define BTF_ID_LIST(name)				\
 __BTF_ID_LIST(name, local)				\
@@ -88,12 +93,56 @@ asm(							\
 ".zero 4                                       \n"	\
 ".popsection;                                  \n");
 
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope)			\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
+"." #scope " __BTF_ID__set__" #name ";         \n"	\
+"__BTF_ID__set__" #name ":;                    \n"	\
+".zero 4                                       \n"	\
+".popsection;                                  \n");
+
+#define BTF_SET_START(name)				\
+__BTF_ID_LIST(name, local)				\
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name)			\
+__BTF_ID_LIST(name, globl)				\
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name)				\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";      \n"	\
+".size __BTF_ID__set__" #name ", .-" #name "  \n"	\
+".popsection;                                 \n");	\
+extern struct btf_id_set name;
+
 #else
 
 #define BTF_ID_LIST(name) static u32 name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
 #define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
 
-- 
cgit v1.2.3


From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:20 +0200
Subject: bpf: Add d_path helper

Adding d_path helper function that returns full path for
given 'struct path' object, which needs to be the kernel
BTF 'path' object. The path is returned in buffer provided
'buf' of size 'sz' and is zero terminated.

  bpf_d_path(&file->f_path, buf, size);

The helper calls directly d_path function, so there's only
limited set of function it can be called from. Adding just
very modest set for the start.

Updating also bpf.h tools uapi header and adding 'path' to
bpf_helpers_doc.py script.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org
---
 include/uapi/linux/bpf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0e1cdf806fe1..0388bc0200b0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3513,6 +3513,7 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
  *		Get a bpf_local_storage from an *inode*.
@@ -3548,6 +3549,18 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ *	Description
+ *		Return full path for given 'struct path' object, which
+ *		needs to be the kernel BTF 'path' object. The path is
+ *		returned in the provided buffer 'buf' of size 'sz' and
+ *		is zero terminated.
+ *
+ *	Return
+ *		On success, the strictly positive length of the string,
+ *		including the trailing NUL character. On error, a negative
+ *		value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3697,6 +3710,7 @@ union bpf_attr {
 	FN(reserve_hdr_opt),		\
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
+	FN(d_path),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 24da79902efc4a8443fae09e6c8e25b515bd8db2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 26 Aug 2020 06:50:16 -0700
Subject: inet: remove inet_sk_copy_descendant()

This is no longer used, SCTP now uses a private helper.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h    | 11 -----------
 include/net/inet_sock.h |  7 -------
 2 files changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a44789d027cc..bac8f4fffbd6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -345,17 +345,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	return (struct raw6_sock *)sk;
 }
 
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
-					   const struct sock *sk_from)
-{
-	int ancestor_size = sizeof(struct inet_sock);
-
-	if (sk_from->sk_family == PF_INET6)
-		ancestor_size += sizeof(struct ipv6_pinfo);
-
-	__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
-}
-
 #define __ipv6_only_sock(sk)	(sk->sk_ipv6only)
 #define ipv6_only_sock(sk)	(__ipv6_only_sock(sk))
 #define ipv6_sk_rxinfo(sk)	((sk)->sk_family == PF_INET6 && \
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a3702d1d4875..89163ef8cf4b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -296,13 +296,6 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
 	memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
 	       sk_from->sk_prot->obj_size - ancestor_size);
 }
-#if !(IS_ENABLED(CONFIG_IPV6))
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
-					   const struct sock *sk_from)
-{
-	__inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
-}
-#endif
 
 int inet_sk_rebuild_header(struct sock *sk);
 
-- 
cgit v1.2.3


From f468f21b7af0fa472ff8ff70f10b9b4995ef7eb3 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 26 Aug 2020 15:54:16 +0300
Subject: net: Take common prefetch code structure into a function

Many device drivers use the same prefetch code structure to
deal with small L1 cacheline size.
Take this code into a function and call it from the drivers.

Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0e303f6603f..b8abe1d7aa0b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2193,6 +2193,22 @@ int netdev_get_num_tc(struct net_device *dev)
 	return dev->num_tc;
 }
 
+static inline void net_prefetch(void *p)
+{
+	prefetch(p);
+#if L1_CACHE_BYTES < 128
+	prefetch((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
+static inline void net_prefetchw(void *p)
+{
+	prefetchw(p);
+#if L1_CACHE_BYTES < 128
+	prefetchw((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
 void netdev_unbind_sb_channel(struct net_device *dev,
 			      struct net_device *sb_dev);
 int netdev_bind_sb_channel_queue(struct net_device *dev,
-- 
cgit v1.2.3


From 493a0ebd804c986e6bd207603c5e1ca748470d3d Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 13 Apr 2020 09:20:53 -0700
Subject: nl80211: fix PORT_AUTHORIZED wording to reflect behavior

The CMD_PORT_AUTHORIZED event was described as an event which indicated
a successfully completed 4-way handshake. But the behavior was
not as advertized. The only driver which uses this is brcmfmac, and
this driver only sends the event after a successful 802.1X-FT roam.

This prevents userspace applications from knowing if the 4-way completed
on:

1. Normal 802.1X connects
2. Normal PSK connections
3. FT-PSK roams

wpa_supplicant handles this incorrect behavior by just completing
the connection after association, before the 4-way has completed.
If the 4-way ends up failing it disconnects at that point.

Since this behavior appears to be expected (wpa_s handles it this
way) I have changed the wording in the API description to reflect the
actual behavior.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Link: https://lore.kernel.org/r/20200413162053.3711-1-prestwoj@gmail.com
[fix spelling of 802.1X throughout ...]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 631f3a997b3c..8cc2b825e4e4 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -647,13 +647,9 @@
  *	authentication/association or not receiving a response from the AP.
  *	Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
  *	well to remain backwards compatible.
- *	When establishing a security association, drivers that support 4 way
- *	handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
- *	the 4 way handshake is completed successfully.
  * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
- *	When a security association was established with the new AP (e.g. if
- *	the FT protocol was used for roaming or the driver completed the 4 way
- *	handshake), this event should be followed by an
+ *	When a security association was established on an 802.1X network using
+ *	fast transition, this event should be followed by an
  *	%NL80211_CMD_PORT_AUTHORIZED event.
  * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
  *	userspace that a connection was dropped by the AP or due to other
@@ -1067,13 +1063,11 @@
  * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
  *	configured PMK for the authenticator address identified by
  *	%NL80211_ATTR_MAC.
- * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
- *	handshake was completed successfully by the driver. The BSSID is
- *	specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake
- *	offload should send this event after indicating 802.11 association with
- *	%NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed
- *	%NL80211_CMD_DISCONNECT should be indicated instead.
- *
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was
+ *	completed successfully. Drivers that support 4 way handshake offload
+ *	should send this event after indicating 802.1X FT assocation with
+ *	%NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT
+ *	should be indicated instead.
  * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
  *	and RX notification.  This command is used both as a request to transmit
  *	a control port frame and as a notification that a control port frame
-- 
cgit v1.2.3


From eb89a6a6b7a1af2d9c8d83ee44fa67700d6337e7 Mon Sep 17 00:00:00 2001
From: Miles Hu <milehu@codeaurora.org>
Date: Tue, 4 Aug 2020 10:16:29 +0200
Subject: nl80211: add support for setting fixed HE rate/gi/ltf

This patch adds the nl80211 structs, definitions, policies and parsing
code required to pass fixed HE rate, GI and LTF settings.

Signed-off-by: Miles Hu <milehu@codeaurora.org>
Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200804081630.2013619-1-john@phrozen.org
[fix comment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d9e6b9fbd95b..c9bce9bba511 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -678,7 +678,10 @@ struct cfg80211_bitrate_mask {
 		u32 legacy;
 		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
 		u16 vht_mcs[NL80211_VHT_NSS_MAX];
+		u16 he_mcs[NL80211_HE_NSS_MAX];
 		enum nl80211_txrate_gi gi;
+		enum nl80211_he_gi he_gi;
+		enum nl80211_he_ltf he_ltf;
 	} control[NUM_NL80211_BANDS];
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8cc2b825e4e4..1a4b922f489f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3180,6 +3180,18 @@ enum nl80211_he_gi {
 	NL80211_RATE_INFO_HE_GI_3_2,
 };
 
+/**
+ * enum nl80211_he_ltf - HE long training field
+ * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec
+ * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec
+ * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec
+ */
+enum nl80211_he_ltf {
+	NL80211_RATE_INFO_HE_1XLTF,
+	NL80211_RATE_INFO_HE_2XLTF,
+	NL80211_RATE_INFO_HE_4XLTF,
+};
+
 /**
  * enum nl80211_he_ru_alloc - HE RU allocation values
  * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
@@ -4735,6 +4747,10 @@ enum nl80211_key_attributes {
  * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
  *	see &struct nl80211_txrate_vht
  * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
+ * @NL80211_TXRATE_HE: HE rates allowed for TX rate selection,
+ *	see &struct nl80211_txrate_he
+ * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us.
+ * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF.
  * @__NL80211_TXRATE_AFTER_LAST: internal
  * @NL80211_TXRATE_MAX: highest TX rate attribute
  */
@@ -4744,6 +4760,9 @@ enum nl80211_tx_rate_attributes {
 	NL80211_TXRATE_HT,
 	NL80211_TXRATE_VHT,
 	NL80211_TXRATE_GI,
+	NL80211_TXRATE_HE,
+	NL80211_TXRATE_HE_GI,
+	NL80211_TXRATE_HE_LTF,
 
 	/* keep last */
 	__NL80211_TXRATE_AFTER_LAST,
@@ -4761,6 +4780,15 @@ struct nl80211_txrate_vht {
 	__u16 mcs[NL80211_VHT_NSS_MAX];
 };
 
+#define NL80211_HE_NSS_MAX		8
+/**
+ * struct nl80211_txrate_he - HE MCS/NSS txrate bitmap
+ * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.)
+ */
+struct nl80211_txrate_he {
+	__u16 mcs[NL80211_HE_NSS_MAX];
+};
+
 enum nl80211_txrate_gi {
 	NL80211_TXRATE_DEFAULT_GI,
 	NL80211_TXRATE_FORCE_SGI,
-- 
cgit v1.2.3


From 00c207edfb2bff9cf03a8f21e57c9c752a1d9f16 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 11 Aug 2020 10:01:03 +0200
Subject: nl80211: rename csa counter attributes countdown counters

We want to reuse the attributes for other counters such as BSS color
change. Rename them to more generic names.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200811080107.3615705-1-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1a4b922f489f..ec96d5fe0e05 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2076,10 +2076,10 @@ enum nl80211_commands {
  *	operation).
  * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
  *	for the time while performing a channel switch.
- * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
- *	switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
- * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
- *	switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
+ * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel
+ *	switch or color change counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CNTDWN_OFFS_PRESP: An array of offsets (u16) to the channel
+ *	switch or color change counters in the probe response (%NL80211_ATTR_PROBE_RESP).
  *
  * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
  *	As specified in the &enum nl80211_rxmgmt_flags.
@@ -2815,8 +2815,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_CH_SWITCH_COUNT,
 	NL80211_ATTR_CH_SWITCH_BLOCK_TX,
 	NL80211_ATTR_CSA_IES,
-	NL80211_ATTR_CSA_C_OFF_BEACON,
-	NL80211_ATTR_CSA_C_OFF_PRESP,
+	NL80211_ATTR_CNTDWN_OFFS_BEACON,
+	NL80211_ATTR_CNTDWN_OFFS_PRESP,
 
 	NL80211_ATTR_RXMGMT_FLAGS,
 
@@ -3003,6 +3003,8 @@ enum nl80211_attrs {
 #define	NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
 #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
 #define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
+#define NL80211_ATTR_CSA_C_OFF_BEACON NL80211_ATTR_CNTDWN_OFFS_BEACON
+#define NL80211_ATTR_CSA_C_OFF_PRESP NL80211_ATTR_CNTDWN_OFFS_PRESP
 
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
-- 
cgit v1.2.3


From 8552a434b6a05cc38006733afe6a239ad4d600a2 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 11 Aug 2020 10:01:04 +0200
Subject: mac80211: rename csa counters to countdown counters

We want to reuse the functions and structs for other counters such as BSS
color change. Rename them to more generic names.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200811080107.3615705-2-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 66e2bfd165e8..ec148b3e9c41 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3736,7 +3736,7 @@ enum ieee80211_reconfig_type {
  *	decremented, and when they reach 1 the driver must call
  *	ieee80211_csa_finish(). Drivers which use ieee80211_beacon_get()
  *	get the csa counter decremented by mac80211, but must check if it is
- *	1 using ieee80211_csa_is_complete() after the beacon has been
+ *	1 using ieee80211_beacon_counter_is_complete() after the beacon has been
  *	transmitted and then call ieee80211_csa_finish().
  *	If the CSA count starts as zero or 1, this function will not be called,
  *	since there won't be any time to beacon before the switch anyway.
@@ -4763,21 +4763,21 @@ void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
  */
 void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
 
-#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+#define IEEE80211_MAX_CNTDWN_COUNTERS_NUM 2
 
 /**
  * struct ieee80211_mutable_offsets - mutable beacon offsets
  * @tim_offset: position of TIM element
  * @tim_length: size of TIM element
- * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
- *	to CSA counters.  This array can contain zero values which
+ * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets
+ *	to countdown counters.  This array can contain zero values which
  *	should be ignored.
  */
 struct ieee80211_mutable_offsets {
 	u16 tim_offset;
 	u16 tim_length;
 
-	u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
+	u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
 };
 
 /**
@@ -4846,31 +4846,31 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 }
 
 /**
- * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
- * The csa counter should be updated after each beacon transmission.
+ * The beacon counter should be updated after each beacon transmission.
  * This function is called implicitly when
  * ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
  * beacon frames are generated by the device, the driver should call this
- * function after each beacon transmission to sync mac80211's csa counters.
+ * function after each beacon transmission to sync mac80211's beacon countdown.
  *
- * Return: new csa counter value
+ * Return: new countdown value
  */
-u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
 
 /**
- * ieee80211_csa_set_counter - request mac80211 to set csa counter
+ * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  * @counter: the new value for the counter
  *
- * The csa counter can be changed by the device, this API should be
+ * The beacon countdown can be changed by the device, this API should be
  * used by the device driver to update csa counter in mac80211.
  *
- * It should never be used together with ieee80211_csa_update_counter(),
+ * It should never be used together with ieee80211_beacon_update_cntdwn(),
  * as it will cause a race condition around the counter value.
  */
-void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
+void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
 
 /**
  * ieee80211_csa_finish - notify mac80211 about channel switch
@@ -4883,13 +4883,12 @@ void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter);
 void ieee80211_csa_finish(struct ieee80211_vif *vif);
 
 /**
- * ieee80211_csa_is_complete - find out if counters reached 1
+ * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
- * This function returns whether the channel switch counters reached zero.
+ * This function returns whether the countdown reached zero.
  */
-bool ieee80211_csa_is_complete(struct ieee80211_vif *vif);
-
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
 
 /**
  * ieee80211_proberesp_get - retrieve a Probe Response template
-- 
cgit v1.2.3


From 2831a631022eed6e3f800f08892132c6edde652c Mon Sep 17 00:00:00 2001
From: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Date: Mon, 17 Aug 2020 02:33:15 -0500
Subject: nl80211: support SAE authentication offload in AP mode

Let drivers advertise support for AP-mode SAE authentication offload
with a new NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag.

Signed-off-by: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Link: https://lore.kernel.org/r/20200817073316.33402-4-stanley.hsu@cypress.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ec96d5fe0e05..0584e0d349f0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,9 +252,13 @@
  * DOC: SAE authentication offload
  *
  * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they
- * support offloading SAE authentication for WPA3-Personal networks. In
- * %NL80211_CMD_CONNECT the password for SAE should be specified using
- * %NL80211_ATTR_SAE_PASSWORD.
+ * support offloading SAE authentication for WPA3-Personal networks in station
+ * mode. Similarly @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag can be set by
+ * drivers indicating the offload support in AP mode.
+ *
+ * The password for SAE should be specified using %NL80211_ATTR_SAE_PASSWORD in
+ * %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP for station and AP mode
+ * respectively.
  */
 
 /**
@@ -5845,6 +5849,9 @@ enum nl80211_feature_flags {
  *	handshake with PSK in AP mode (PSK is passed as part of the start AP
  *	command).
  *
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication
+ *	in AP mode (SAE password is passed as part of the start AP command).
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5902,6 +5909,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS,
 	NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION,
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
+	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 2fa4e4b799e191530edbae4b96b85d4486e55053 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 27 Aug 2020 04:00:28 +0200
Subject: net: pcs: Move XPCS into new PCS subdirectory

Create drivers/net/pcs and move the Synopsys DesignWare XPCS into the
new directory. Move the header file into a subdirectory
include/linux/pcs

Start a naming convention of all PCS files use the prefix pcs-, and
rename the XPCS files to fit.

v2:
Add include/linux/pcs

v4:
Fix include path in stmmac.
Remove PCS_DEVICES to avoid new prompts

Cc: Jose Abreu <Jose.Abreu@synopsys.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio-xpcs.h    | 41 -----------------------------------------
 include/linux/pcs/pcs-xpcs.h | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 41 deletions(-)
 delete mode 100644 include/linux/mdio-xpcs.h
 create mode 100644 include/linux/pcs/pcs-xpcs.h

(limited to 'include')

diff --git a/include/linux/mdio-xpcs.h b/include/linux/mdio-xpcs.h
deleted file mode 100644
index 9a841aa5982d..000000000000
--- a/include/linux/mdio-xpcs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates.
- * Synopsys DesignWare XPCS helpers
- */
-
-#ifndef __LINUX_MDIO_XPCS_H
-#define __LINUX_MDIO_XPCS_H
-
-#include <linux/phy.h>
-#include <linux/phylink.h>
-
-struct mdio_xpcs_args {
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
-	struct mii_bus *bus;
-	int addr;
-};
-
-struct mdio_xpcs_ops {
-	int (*validate)(struct mdio_xpcs_args *xpcs,
-			unsigned long *supported,
-			struct phylink_link_state *state);
-	int (*config)(struct mdio_xpcs_args *xpcs,
-		      const struct phylink_link_state *state);
-	int (*get_state)(struct mdio_xpcs_args *xpcs,
-			 struct phylink_link_state *state);
-	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
-		       phy_interface_t interface);
-	int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
-};
-
-#if IS_ENABLED(CONFIG_MDIO_XPCS)
-struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
-#else
-static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
-{
-	return NULL;
-}
-#endif
-
-#endif /* __LINUX_MDIO_XPCS_H */
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
new file mode 100644
index 000000000000..351c1c9aedc5
--- /dev/null
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare XPCS helpers
+ */
+
+#ifndef __LINUX_PCS_XPCS_H
+#define __LINUX_PCS_XPCS_H
+
+#include <linux/phy.h>
+#include <linux/phylink.h>
+
+struct mdio_xpcs_args {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	struct mii_bus *bus;
+	int addr;
+};
+
+struct mdio_xpcs_ops {
+	int (*validate)(struct mdio_xpcs_args *xpcs,
+			unsigned long *supported,
+			struct phylink_link_state *state);
+	int (*config)(struct mdio_xpcs_args *xpcs,
+		      const struct phylink_link_state *state);
+	int (*get_state)(struct mdio_xpcs_args *xpcs,
+			 struct phylink_link_state *state);
+	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
+		       phy_interface_t interface);
+	int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
+};
+
+#if IS_ENABLED(CONFIG_PCS_XPCS)
+struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
+#else
+static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
+{
+	return NULL;
+}
+#endif
+
+#endif /* __LINUX_PCS_XPCS_H */
-- 
cgit v1.2.3


From fcba68bd75bb1d42b3aec7f471d382a9e639a672 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 27 Aug 2020 04:00:29 +0200
Subject: net/phy/mdio-i2c: Move header file to include/linux/mdio

In preparation for moving all MDIO drivers into drivers/net/mdio, move
the mdio-i2c header file into include/linux/mdio so it can be used by
both the MDIO driver and the SFP code which instantiates I2C MDIO
busses.

v2:
Add include/linux/mdio

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio/mdio-i2c.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/linux/mdio/mdio-i2c.h

(limited to 'include')

diff --git a/include/linux/mdio/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h
new file mode 100644
index 000000000000..b1d27f7cd23f
--- /dev/null
+++ b/include/linux/mdio/mdio-i2c.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MDIO I2C bridge
+ *
+ * Copyright (C) 2015 Russell King
+ */
+#ifndef MDIO_I2C_H
+#define MDIO_I2C_H
+
+struct device;
+struct i2c_adapter;
+struct mii_bus;
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c);
+
+#endif
-- 
cgit v1.2.3


From 232e15e1d7ddb191c28248cb681f4544c0ff1c54 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 27 Aug 2020 04:00:30 +0200
Subject: net: xgene: Move shared header file into include/linux

This header file is currently included into the ethernet driver via a
relative path into the PHY subsystem. This is bad practice, and causes
issues for the upcoming move of the MDIO driver. Move the header file
into include/linux to clean this up.

v2:
Move header to include/linux/mdio

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio/mdio-xgene.h | 130 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 include/linux/mdio/mdio-xgene.h

(limited to 'include')

diff --git a/include/linux/mdio/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h
new file mode 100644
index 000000000000..8af93ada8b64
--- /dev/null
+++ b/include/linux/mdio/mdio-xgene.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Applied Micro X-Gene SoC MDIO Driver
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Iyappan Subramanian <isubramanian@apm.com>
+ */
+
+#ifndef __MDIO_XGENE_H__
+#define __MDIO_XGENE_H__
+
+#define BLOCK_XG_MDIO_CSR_OFFSET	0x5000
+#define BLOCK_DIAG_CSR_OFFSET		0xd000
+#define XGENET_CONFIG_REG_ADDR		0x20
+
+#define MAC_ADDR_REG_OFFSET		0x00
+#define MAC_COMMAND_REG_OFFSET		0x04
+#define MAC_WRITE_REG_OFFSET		0x08
+#define MAC_READ_REG_OFFSET		0x0c
+#define MAC_COMMAND_DONE_REG_OFFSET	0x10
+
+#define CLKEN_OFFSET			0x08
+#define SRST_OFFSET			0x00
+
+#define MENET_CFG_MEM_RAM_SHUTDOWN_ADDR	0x70
+#define MENET_BLOCK_MEM_RDY_ADDR	0x74
+
+#define MAC_CONFIG_1_ADDR		0x00
+#define MII_MGMT_COMMAND_ADDR		0x24
+#define MII_MGMT_ADDRESS_ADDR		0x28
+#define MII_MGMT_CONTROL_ADDR		0x2c
+#define MII_MGMT_STATUS_ADDR		0x30
+#define MII_MGMT_INDICATORS_ADDR	0x34
+#define SOFT_RESET			BIT(31)
+
+#define MII_MGMT_CONFIG_ADDR            0x20
+#define MII_MGMT_COMMAND_ADDR           0x24
+#define MII_MGMT_ADDRESS_ADDR           0x28
+#define MII_MGMT_CONTROL_ADDR           0x2c
+#define MII_MGMT_STATUS_ADDR            0x30
+#define MII_MGMT_INDICATORS_ADDR        0x34
+
+#define MIIM_COMMAND_ADDR               0x20
+#define MIIM_FIELD_ADDR                 0x24
+#define MIIM_CONFIGURATION_ADDR         0x28
+#define MIIM_LINKFAILVECTOR_ADDR        0x2c
+#define MIIM_INDICATOR_ADDR             0x30
+#define MIIMRD_FIELD_ADDR               0x34
+
+#define MDIO_CSR_OFFSET			0x5000
+
+#define REG_ADDR_POS			0
+#define REG_ADDR_LEN			5
+#define PHY_ADDR_POS			8
+#define PHY_ADDR_LEN			5
+
+#define HSTMIIMWRDAT_POS		0
+#define HSTMIIMWRDAT_LEN		16
+#define HSTPHYADX_POS			23
+#define HSTPHYADX_LEN			5
+#define HSTREGADX_POS			18
+#define HSTREGADX_LEN			5
+#define HSTLDCMD			BIT(3)
+#define HSTMIIMCMD_POS			0
+#define HSTMIIMCMD_LEN			3
+
+#define BUSY_MASK			BIT(0)
+#define READ_CYCLE_MASK			BIT(0)
+
+enum xgene_enet_cmd {
+	XGENE_ENET_WR_CMD = BIT(31),
+	XGENE_ENET_RD_CMD = BIT(30)
+};
+
+enum {
+	MIIM_CMD_IDLE,
+	MIIM_CMD_LEGACY_WRITE,
+	MIIM_CMD_LEGACY_READ,
+};
+
+enum xgene_mdio_id {
+	XGENE_MDIO_RGMII = 1,
+	XGENE_MDIO_XFI
+};
+
+struct xgene_mdio_pdata {
+	struct clk *clk;
+	struct device *dev;
+	void __iomem *mac_csr_addr;
+	void __iomem *diag_csr_addr;
+	void __iomem *mdio_csr_addr;
+	struct mii_bus *mdio_bus;
+	int mdio_id;
+	spinlock_t mac_lock; /* mac lock */
+};
+
+/* Set the specified value into a bit-field defined by its starting position
+ * and length within a single u64.
+ */
+static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val)
+{
+	return (val & ((1ULL << len) - 1)) << pos;
+}
+
+#define SET_VAL(field, val) \
+		xgene_enet_set_field_value(field ## _POS, field ## _LEN, val)
+
+#define SET_BIT(field) \
+		xgene_enet_set_field_value(field ## _POS, 1, 1)
+
+/* Get the value from a bit-field defined by its starting position
+ * and length within the specified u64.
+ */
+static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src)
+{
+	return (src >> pos) & ((1ULL << len) - 1);
+}
+
+#define GET_VAL(field, src) \
+		xgene_enet_get_field_value(field ## _POS, field ## _LEN, src)
+
+#define GET_BIT(field, src) \
+		xgene_enet_get_field_value(field ## _POS, 1, src)
+
+u32 xgene_mdio_rd_mac(struct xgene_mdio_pdata *pdata, u32 rd_addr);
+void xgene_mdio_wr_mac(struct xgene_mdio_pdata *pdata, u32 wr_addr, u32 data);
+int xgene_mdio_rgmii_read(struct mii_bus *bus, int phy_id, int reg);
+int xgene_mdio_rgmii_write(struct mii_bus *bus, int phy_id, int reg, u16 data);
+struct phy_device *xgene_enet_phy_register(struct mii_bus *bus, int phy_addr);
+
+#endif  /* __MDIO_XGENE_H__ */
-- 
cgit v1.2.3


From 50aba46c234ea6ab3134cebb5ab27885f33a3e5d Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 27 Aug 2020 14:19:23 +0200
Subject: gtp: add notification mechanism

Like all other network functions, let's notify gtp context on creation and
deletion.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Gabriel Ganne <gabriel.ganne@6wind.com>
Acked-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/gtp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index c7d66755d212..79f9191bbb24 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -2,6 +2,8 @@
 #ifndef _UAPI_LINUX_GTP_H_
 #define _UAPI_LINUX_GTP_H_
 
+#define GTP_GENL_MCGRP_NAME	"gtp"
+
 enum gtp_genl_cmds {
 	GTP_CMD_NEWPDP,
 	GTP_CMD_DELPDP,
-- 
cgit v1.2.3


From b0c9eb37817943840a1a82dbc998c491609a0afd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Aug 2020 22:19:22 -0700
Subject: bpf: Make bpf_link_info.iter similar to bpf_iter_link_info

bpf_link_info.iter is used by link_query to return bpf_iter_link_info
to user space. Fields may be different, e.g., map_fd vs. map_id, so
we cannot reuse the exact structure. But make them similar, e.g.,

  struct bpf_link_info {
     /* common fields */
     union {
	struct { ... } raw_tracepoint;
	struct { ... } tracing;
	...
	struct {
	    /* common fields for iter */
	    union {
		struct {
		    __u32 map_id;
		} map;
		/* other structs for other targets */
	    };
	};
    };
 };

so the structure is extensible the same way as bpf_iter_link_info.

Fixes: 6b0a249a301e ("bpf: Implement link_query for bpf iterators")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200828051922.758950-1-yhs@fb.com
---
 include/uapi/linux/bpf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0388bc0200b0..ef7af384f5ee 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4251,8 +4251,10 @@ struct bpf_link_info {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
 			union {
-				__u32 map_id;
-			} map;
+				struct {
+					__u32 map_id;
+				} map;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
-- 
cgit v1.2.3


From f4d05259213ff1e91f767c91dcab455f68308fac Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 27 Aug 2020 18:18:06 -0700
Subject: bpf: Add map_meta_equal map ops

Some properties of the inner map is used in the verification time.
When an inner map is inserted to an outer map at runtime,
bpf_map_meta_equal() is currently used to ensure those properties
of the inserting inner map stays the same as the verification
time.

In particular, the current bpf_map_meta_equal() checks max_entries which
turns out to be too restrictive for most of the maps which do not use
max_entries during the verification time.  It limits the use case that
wants to replace a smaller inner map with a larger inner map.  There are
some maps do use max_entries during verification though.  For example,
the map_gen_lookup in array_map_ops uses the max_entries to generate
the inline lookup code.

To accommodate differences between maps, the map_meta_equal is added
to bpf_map_ops.  Each map-type can decide what to check when its
map is used as an inner map during runtime.

Also, some map types cannot be used as an inner map and they are
currently black listed in bpf_map_meta_alloc() in map_in_map.c.
It is not unusual that the new map types may not aware that such
blacklist exists.  This patch enforces an explicit opt-in
and only allows a map to be used as an inner map if it has
implemented the map_meta_equal ops.  It is based on the
discussion in [1].

All maps that support inner map has its map_meta_equal points
to bpf_map_meta_equal in this patch.  A later patch will
relax the max_entries check for most maps.  bpf_types.h
counts 28 map types.  This patch adds 23 ".map_meta_equal"
by using coccinelle.  -5 for
	BPF_MAP_TYPE_PROG_ARRAY
	BPF_MAP_TYPE_(PERCPU)_CGROUP_STORAGE
	BPF_MAP_TYPE_STRUCT_OPS
	BPF_MAP_TYPE_ARRAY_OF_MAPS
	BPF_MAP_TYPE_HASH_OF_MAPS

The "if (inner_map->inner_map_meta)" check in bpf_map_meta_alloc()
is moved such that the same error is returned.

[1]: https://lore.kernel.org/bpf/20200522022342.899756-1-kafai@fb.com/

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200828011806.1970400-1-kafai@fb.com
---
 include/linux/bpf.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a6131d95e31e..dbba82a80087 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -112,6 +112,19 @@ struct bpf_map_ops {
 	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
 					   void *owner, u32 size);
 	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+
+	/* map_meta_equal must be implemented for maps that can be
+	 * used as an inner map.  It is a runtime check to ensure
+	 * an inner map can be inserted to an outer map.
+	 *
+	 * Some properties of the inner map has been used during the
+	 * verification time.  When inserting an inner map at the runtime,
+	 * map_meta_equal has to ensure the inserting map has the same
+	 * properties that the verifier has used earlier.
+	 */
+	bool (*map_meta_equal)(const struct bpf_map *meta0,
+			       const struct bpf_map *meta1);
+
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
@@ -235,6 +248,9 @@ int map_check_no_btf(const struct bpf_map *map,
 		     const struct btf_type *key_type,
 		     const struct btf_type *value_type);
 
+bool bpf_map_meta_equal(const struct bpf_map *meta0,
+			const struct bpf_map *meta1);
+
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
 /* function argument constraints */
-- 
cgit v1.2.3


From 316cdaa1158af17250397054f92bb339fbd8e282 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 26 Aug 2020 09:05:35 -0700
Subject: net: add option to not create fall-back tunnels in root-ns as well

The sysctl that was added  earlier by commit 79134e6ce2c ("net: do
not create fallback tunnels for non-default namespaces") to create
fall-back only in root-ns. This patch enhances that behavior to provide
option not to create fallback tunnels in root-ns as well. Since modules
that create fallback tunnels could be built-in and setting the sysctl
value after booting is pointless, so added a kernel cmdline options to
change this default. The default setting is preseved for backward
compatibility. The kernel command line option of fb_tunnels=initns will
set the sysctl value to 1 and will create fallback tunnels only in initns
while kernel cmdline fb_tunnels=none will set the sysctl value to 2 and
fallback tunnels are skipped in every netns.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Maciej Zenczykowski <maze@google.com>
Cc: Jian Yang <jianyang@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b8abe1d7aa0b..c0b512e6a02b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -640,10 +640,14 @@ struct netdev_queue {
 extern int sysctl_fb_tunnels_only_for_init_net;
 extern int sysctl_devconf_inherit_init_net;
 
+/*
+ * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns
+ *                                     == 1 : For initns only
+ *                                     == 2 : For none.
+ */
 static inline bool net_has_fallback_tunnels(const struct net *net)
 {
-	return net == &init_net ||
-	       !IS_ENABLED(CONFIG_SYSCTL) ||
+	return (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1) ||
 	       !sysctl_fb_tunnels_only_for_init_net;
 }
 
-- 
cgit v1.2.3


From 7a81575b806e5dab214025e6757362c62d946405 Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Thu, 20 Aug 2020 10:19:01 +0200
Subject: netfilter: nf_tables: add userdata attributes to nft_table

Enables storing userdata for nft_table. Field udata points to user data
and udlen store its length.

Adds new attribute flag NFTA_TABLE_USERDATA

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 2 ++
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index bf9491b77d16..97a7e147a59a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1080,6 +1080,8 @@ struct nft_table {
 					flags:8,
 					genmask:2;
 	char				*name;
+	u16				udlen;
+	u8				*udata;
 };
 
 void nft_register_chain_type(const struct nft_chain_type *);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 42f351c1f5c5..aeb88cbd303e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -172,6 +172,7 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
+ * @NFTA_TABLE_USERDATA: user data (NLA_BINARY)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
@@ -180,6 +181,7 @@ enum nft_table_attributes {
 	NFTA_TABLE_USE,
 	NFTA_TABLE_HANDLE,
 	NFTA_TABLE_PAD,
+	NFTA_TABLE_USERDATA,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
-- 
cgit v1.2.3


From 4afc41dfa5a716e9e7a90c22972583f337c0bcbf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Aug 2020 00:52:43 +0200
Subject: netfilter: conntrack: remove ignore stats

This counter increments when nf_conntrack_in sees a packet that already
has a conntrack attached or when the packet is marked as UNTRACKED.
Neither is an error.

The former is normal for loopback traffic.  The second happens for
certain ICMPv6 packets or when nftables/ip(6)tables rules are in place.

In case someone needs to count UNTRACKED packets, or packets
that are marked as untracked before conntrack_in this can be done with
both nftables and ip(6)tables rules.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h      | 1 -
 include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 1db83c931d9c..96b90d7e361f 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -8,7 +8,6 @@
 struct ip_conntrack_stat {
 	unsigned int found;
 	unsigned int invalid;
-	unsigned int ignore;
 	unsigned int insert;
 	unsigned int insert_failed;
 	unsigned int drop;
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 262881792671..3e471558da82 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -247,7 +247,7 @@ enum ctattr_stats_cpu {
 	CTA_STATS_FOUND,
 	CTA_STATS_NEW,		/* no longer used */
 	CTA_STATS_INVALID,
-	CTA_STATS_IGNORE,
+	CTA_STATS_IGNORE,	/* no longer used */
 	CTA_STATS_DELETE,	/* no longer used */
 	CTA_STATS_DELETE_LIST,	/* no longer used */
 	CTA_STATS_INSERT,
-- 
cgit v1.2.3


From bc92470413f3af152db0d8f90ef3eb13f8cc417a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Aug 2020 00:52:44 +0200
Subject: netfilter: conntrack: add clash resolution stat counter

There is a misconception about what "insert_failed" means.

We increment this even when a clash got resolved, so it might not indicate
a problem.

Add a dedicated counter for clash resolution and only increment
insert_failed if a clash cannot be resolved.

For the old /proc interface, export this in place of an older stat
that got removed a while back.
For ctnetlink, export this with a new attribute.

Also correct an outdated comment that implies we add a duplicate tuple --
we only add the (unique) reply direction.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h      | 1 +
 include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 96b90d7e361f..0c7d8d1e945d 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -10,6 +10,7 @@ struct ip_conntrack_stat {
 	unsigned int invalid;
 	unsigned int insert;
 	unsigned int insert_failed;
+	unsigned int clash_resolve;
 	unsigned int drop;
 	unsigned int early_drop;
 	unsigned int error;
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 3e471558da82..d8484be72fdc 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -256,6 +256,7 @@ enum ctattr_stats_cpu {
 	CTA_STATS_EARLY_DROP,
 	CTA_STATS_ERROR,
 	CTA_STATS_SEARCH_RESTART,
+	CTA_STATS_CLASH_RESOLVE,
 	__CTA_STATS_MAX,
 };
 #define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
-- 
cgit v1.2.3


From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:11 -0700
Subject: bpf: Introduce sleepable BPF programs

Introduce sleepable BPF programs that can request such property for themselves
via BPF_F_SLEEPABLE flag at program load time. In such case they will be able
to use helpers like bpf_copy_from_user() that might sleep. At present only
fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only
when they are attached to kernel functions that are known to allow sleeping.

The non-sleepable programs are relying on implicit rcu_read_lock() and
migrate_disable() to protect life time of programs, maps that they use and
per-cpu kernel structures used to pass info between bpf programs and the
kernel. The sleepable programs cannot be enclosed into rcu_read_lock().
migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs
should not be enclosed in migrate_disable() as well. Therefore
rcu_read_lock_trace is used to protect the life time of sleepable progs.

There are many networking and tracing program types. In many cases the
'struct bpf_prog *' pointer itself is rcu protected within some other kernel
data structure and the kernel code is using rcu_dereference() to load that
program pointer and call BPF_PROG_RUN() on it. All these cases are not touched.
Instead sleepable bpf programs are allowed with bpf trampoline only. The
program pointers are hard-coded into generated assembly of bpf trampoline and
synchronize_rcu_tasks_trace() is used to protect the life time of the program.
The same trampoline can hold both sleepable and non-sleepable progs.

When rcu_read_lock_trace is held it means that some sleepable bpf program is
running from bpf trampoline. Those programs can use bpf arrays and preallocated
hash/lru maps. These map types are waiting on programs to complete via
synchronize_rcu_tasks_trace();

Updates to trampoline now has to do synchronize_rcu_tasks_trace() and
synchronize_rcu_tasks() to wait for sleepable progs to finish and for
trampoline assembly to finish.

This is the first step of introducing sleepable progs. Eventually dynamically
allocated hash maps can be allowed and networking program types can become
sleepable too.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h      | 3 +++
 include/uapi/linux/bpf.h | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dbba82a80087..4dd7e927621d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 /* these two functions are called from generated trampoline */
 u64 notrace __bpf_prog_enter(void);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+void notrace __bpf_prog_enter_sleepable(void);
+void notrace __bpf_prog_exit_sleepable(void);
 
 struct bpf_ksym {
 	unsigned long		 start;
@@ -734,6 +736,7 @@ struct bpf_prog_aux {
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
+	bool sleepable;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef7af384f5ee..6e8b706aeb05 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -346,6 +346,14 @@ enum bpf_link_type {
 /* The verifier internal test flag. Behavior is undefined */
 #define BPF_F_TEST_STATE_FREQ	(1U << 3)
 
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE		(1U << 4)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
-- 
cgit v1.2.3


From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:12 -0700
Subject: bpf: Add bpf_copy_from_user() helper.

Sleepable BPF programs can now use copy_from_user() to access user memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h      | 1 +
 include/uapi/linux/bpf.h | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4dd7e927621d..c6d9f2c444f4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1784,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6e8b706aeb05..a613750d5515 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3569,6 +3569,13 @@ union bpf_attr {
  *		On success, the strictly positive length of the string,
  *		including the trailing NUL character. On error, a negative
  *		value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * 	Description
+ * 		Read *size* bytes from user space address *user_ptr* and store
+ * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3719,6 +3726,7 @@ union bpf_attr {
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
+	FN(copy_from_user),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 67407a406db337acdaabecd3747d160d89a929e4 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi77@gmail.com>
Date: Sat, 29 Aug 2020 08:19:15 +0200
Subject: netfilter: nft_socket: add wildcard support

Add NFT_SOCKET_WILDCARD to match to wildcard socket listener.

Signed-off-by: Balazs Scheidler <bazsi77@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index aeb88cbd303e..543dc697b796 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1010,10 +1010,12 @@ enum nft_socket_attributes {
  *
  * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
  * @NFT_SOCKET_MARK: Value of the socket mark
+ * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0)
  */
 enum nft_socket_keys {
 	NFT_SOCKET_TRANSPARENT,
 	NFT_SOCKET_MARK,
+	NFT_SOCKET_WILDCARD,
 	__NFT_SOCKET_MAX
 };
 #define NFT_SOCKET_MAX	(__NFT_SOCKET_MAX - 1)
-- 
cgit v1.2.3


From 9667305c6374df8672be46bc496f52f040999531 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 31 Aug 2020 08:51:55 -0700
Subject: bpf: Fix build without BPF_SYSCALL, but with BPF_JIT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_BPF_SYSCALL is not set, but CONFIG_BPF_JIT=y
the kernel build fails:
In file included from ../kernel/bpf/trampoline.c:11:
../kernel/bpf/trampoline.c: In function ‘bpf_trampoline_update’:
../kernel/bpf/trampoline.c:220:39: error: ‘call_rcu_tasks_trace’ undeclared
../kernel/bpf/trampoline.c: In function ‘__bpf_prog_enter_sleepable’:
../kernel/bpf/trampoline.c:411:2: error: implicit declaration of function ‘rcu_read_lock_trace’
../kernel/bpf/trampoline.c: In function ‘__bpf_prog_exit_sleepable’:
../kernel/bpf/trampoline.c:416:2: error: implicit declaration of function ‘rcu_read_unlock_trace’

This is due to:
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_JIT) += dispatcher.o
There is a number of functions that arch/x86/net/bpf_jit_comp.c is
using from these two files, but none of them will be used when
only cBPF is on (which is the case for BPF_SYSCALL=n BPF_JIT=y).

Add rcu_trace functions to rcupdate_trace.h. The JITed code won't execute them
and BPF trampoline logic won't be used without BPF_SYSCALL.

Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/bpf/20200831155155.62754-1-alexei.starovoitov@gmail.com
---
 include/linux/rcupdate_trace.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index d9015aac78c6..aaaac8ac927c 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -82,7 +82,14 @@ static inline void rcu_read_unlock_trace(void)
 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
 void synchronize_rcu_tasks_trace(void);
 void rcu_barrier_tasks_trace(void);
-
+#else
+/*
+ * The BPF JIT forms these addresses even when it doesn't call these
+ * functions, so provide definitions that result in runtime errors.
+ */
+static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) { BUG(); }
+static inline void rcu_read_lock_trace(void) { BUG(); }
+static inline void rcu_read_unlock_trace(void) { BUG(); }
 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 
 #endif /* __LINUX_RCUPDATE_TRACE_H */
-- 
cgit v1.2.3


From 1742b3d528690ae7773cf7bf2f01a90ee1de2fe0 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:15 +0200
Subject: xsk: i40e: ice: ixgbe: mlx5: Pass buffer pool to driver instead of
 umem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the explicit umem reference passed to the driver in AF_XDP
zero-copy mode with the buffer pool instead. This in preparation for
extending the functionality of the zero-copy mode so that umems can be
shared between queues on the same netdev and also between netdevs. In
this commit, only an umem reference has been added to the buffer pool
struct. But later commits will add other entities to it. These are
going to be entities that are different between different queue ids
and netdevs even though the umem is shared between them.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-2-git-send-email-magnus.karlsson@intel.com
---
 include/linux/netdevice.h   | 10 +++++-----
 include/net/xdp_sock_drv.h  |  7 ++++---
 include/net/xsk_buff_pool.h |  4 +++-
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0e303f6603f..d088dd866825 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -618,7 +618,7 @@ struct netdev_queue {
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
 #ifdef CONFIG_XDP_SOCKETS
-	struct xdp_umem         *umem;
+	struct xsk_buff_pool    *pool;
 #endif
 /*
  * write-mostly part
@@ -751,7 +751,7 @@ struct netdev_rx_queue {
 	struct net_device		*dev;
 	struct xdp_rxq_info		xdp_rxq;
 #ifdef CONFIG_XDP_SOCKETS
-	struct xdp_umem                 *umem;
+	struct xsk_buff_pool            *pool;
 #endif
 } ____cacheline_aligned_in_smp;
 
@@ -879,7 +879,7 @@ enum bpf_netdev_command {
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_MAP_ALLOC,
 	BPF_OFFLOAD_MAP_FREE,
-	XDP_SETUP_XSK_UMEM,
+	XDP_SETUP_XSK_POOL,
 };
 
 struct bpf_prog_offload_ops;
@@ -913,9 +913,9 @@ struct netdev_bpf {
 		struct {
 			struct bpf_offloaded_map *offmap;
 		};
-		/* XDP_SETUP_XSK_UMEM */
+		/* XDP_SETUP_XSK_POOL */
 		struct {
-			struct xdp_umem *umem;
+			struct xsk_buff_pool *pool;
 			u16 queue_id;
 		} xsk;
 	};
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index ccf848f7efa4..5dc8d3c0dcd4 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -14,7 +14,8 @@
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
 bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
 void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+struct xsk_buff_pool *xdp_get_xsk_pool_from_qid(struct net_device *dev,
+						u16 queue_id);
 void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
 void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
 void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
@@ -125,8 +126,8 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
 {
 }
 
-static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
-						     u16 queue_id)
+static inline struct xsk_buff_pool *
+xdp_get_xsk_pool_from_qid(struct net_device *dev, u16 queue_id)
 {
 	return NULL;
 }
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 6842990e2712..f851b0a68324 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -13,6 +13,7 @@ struct xsk_buff_pool;
 struct xdp_rxq_info;
 struct xsk_queue;
 struct xdp_desc;
+struct xdp_umem;
 struct device;
 struct page;
 
@@ -42,13 +43,14 @@ struct xsk_buff_pool {
 	u32 frame_len;
 	bool dma_need_sync;
 	bool unaligned;
+	struct xdp_umem *umem;
 	void *addrs;
 	struct device *dev;
 	struct xdp_buff_xsk *free_heads[];
 };
 
 /* AF_XDP core. */
-struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
+struct xsk_buff_pool *xp_create(struct xdp_umem *umem, u32 chunks,
 				u32 chunk_size, u32 headroom, u64 size,
 				bool unaligned);
 void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
-- 
cgit v1.2.3


From c4655761d3cf62bf5f86650e79349c1bfa5c6285 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:16 +0200
Subject: xsk: i40e: ice: ixgbe: mlx5: Rename xsk zero-copy driver interfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the AF_XDP zero-copy driver interface functions to better
reflect what they do after the replacement of umems with buffer
pools in the previous commit. Mostly it is about replacing the
umem name from the function names with xsk_buff and also have
them take the a buffer pool pointer instead of a umem. The
various ring functions have also been renamed in the process so
that they have the same naming convention as the internal
functions in xsk_queue.h. This so that it will be clearer what
they do and also for consistency.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-3-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h     |   1 +
 include/net/xdp_sock_drv.h | 114 +++++++++++++++++++++++----------------------
 2 files changed, 60 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9d87cc40c11..ccf6cb54f9a6 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -52,6 +52,7 @@ struct xdp_sock {
 	struct net_device *dev;
 	struct xdp_umem *umem;
 	struct list_head flush_node;
+	struct xsk_buff_pool *pool;
 	u16 queue_id;
 	bool zc;
 	enum {
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 5dc8d3c0dcd4..a7c7d2eff860 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -11,48 +11,50 @@
 
 #ifdef CONFIG_XDP_SOCKETS
 
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xsk_buff_pool *xdp_get_xsk_pool_from_qid(struct net_device *dev,
-						u16 queue_id);
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
+bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+void xsk_tx_release(struct xsk_buff_pool *pool);
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+					    u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
+void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
+bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
 
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
 {
-	return XDP_PACKET_HEADROOM + umem->headroom;
+	return XDP_PACKET_HEADROOM + pool->headroom;
 }
 
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
 {
-	return umem->chunk_size;
+	return pool->chunk_size;
 }
 
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 {
-	return xsk_umem_get_chunk_size(umem) - xsk_umem_get_headroom(umem);
+	return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
 }
 
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
-	xp_set_rxq_info(umem->pool, rxq);
+	xp_set_rxq_info(pool, rxq);
 }
 
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
 				      unsigned long attrs)
 {
-	xp_dma_unmap(umem->pool, attrs);
+	xp_dma_unmap(pool, attrs);
 }
 
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
-				   unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+				   struct device *dev, unsigned long attrs)
 {
-	return xp_dma_map(umem->pool, dev, attrs, umem->pgs, umem->npgs);
+	struct xdp_umem *umem = pool->umem;
+
+	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
 }
 
 static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
@@ -69,14 +71,14 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
 	return xp_get_frame_dma(xskb);
 }
 
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
 {
-	return xp_alloc(umem->pool);
+	return xp_alloc(pool);
 }
 
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
 {
-	return xp_can_alloc(umem->pool, count);
+	return xp_can_alloc(pool, count);
 }
 
 static inline void xsk_buff_free(struct xdp_buff *xdp)
@@ -86,14 +88,15 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 	xp_free(xskb);
 }
 
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+					      u64 addr)
 {
-	return xp_raw_get_dma(umem->pool, addr);
+	return xp_raw_get_dma(pool, addr);
 }
 
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 {
-	return xp_raw_get_data(umem->pool, addr);
+	return xp_raw_get_data(pool, addr);
 }
 
 static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
@@ -103,83 +106,83 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
 	xp_dma_sync_for_cpu(xskb);
 }
 
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
 						    dma_addr_t dma,
 						    size_t size)
 {
-	xp_dma_sync_for_device(umem->pool, dma, size);
+	xp_dma_sync_for_device(pool, dma, size);
 }
 
 #else
 
-static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
 {
 }
 
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
-				       struct xdp_desc *desc)
+static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
+				    struct xdp_desc *desc)
 {
 	return false;
 }
 
-static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+static inline void xsk_tx_release(struct xsk_buff_pool *pool)
 {
 }
 
 static inline struct xsk_buff_pool *
-xdp_get_xsk_pool_from_qid(struct net_device *dev, u16 queue_id)
+xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id)
 {
 	return NULL;
 }
 
-static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 }
 
-static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
 {
 }
 
-static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 }
 
-static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
 {
 }
 
-static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
 {
 	return false;
 }
 
-static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
 {
 	return 0;
 }
 
-static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
 {
 	return 0;
 }
 
-static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 {
 	return 0;
 }
 
-static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
 }
 
-static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
 				      unsigned long attrs)
 {
 }
 
-static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
-				   unsigned long attrs)
+static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
+				   struct device *dev, unsigned long attrs)
 {
 	return 0;
 }
@@ -194,12 +197,12 @@ static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
 	return 0;
 }
 
-static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
 {
 	return NULL;
 }
 
-static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
 {
 	return false;
 }
@@ -208,12 +211,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 {
 }
 
-static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
+					      u64 addr)
 {
 	return 0;
 }
 
-static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 {
 	return NULL;
 }
@@ -222,7 +226,7 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
 {
 }
 
-static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
 						    dma_addr_t dma,
 						    size_t size)
 {
-- 
cgit v1.2.3


From 1c1efc2af158869795d3334a12fed2afd9c51539 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:17 +0200
Subject: xsk: Create and free buffer pool independently from umem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create and free the buffer pool independently from the umem. Move
these operations that are performed on the buffer pool from the
umem create and destroy functions to new create and destroy
functions just for the buffer pool. This so that in later commits
we can instantiate multiple buffer pools per umem when sharing a
umem between HW queues and/or devices. We also erradicate the
back pointer from the umem to the buffer pool as this will not
work when we introduce the possibility to have multiple buffer
pools per umem.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-4-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      |  3 +--
 include/net/xsk_buff_pool.h | 13 ++++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index ccf6cb54f9a6..ea2b020c4fbc 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -20,13 +20,12 @@ struct xdp_buff;
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
-	struct xsk_buff_pool *pool;
 	u64 size;
 	u32 headroom;
 	u32 chunk_size;
+	u32 chunks;
 	struct user_struct *user;
 	refcount_t users;
-	struct work_struct work;
 	struct page **pgs;
 	u32 npgs;
 	u16 queue_id;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index f851b0a68324..4025486cc057 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -14,6 +14,7 @@ struct xdp_rxq_info;
 struct xsk_queue;
 struct xdp_desc;
 struct xdp_umem;
+struct xdp_sock;
 struct device;
 struct page;
 
@@ -46,16 +47,22 @@ struct xsk_buff_pool {
 	struct xdp_umem *umem;
 	void *addrs;
 	struct device *dev;
+	refcount_t users;
+	struct work_struct work;
 	struct xdp_buff_xsk *free_heads[];
 };
 
 /* AF_XDP core. */
-struct xsk_buff_pool *xp_create(struct xdp_umem *umem, u32 chunks,
-				u32 chunk_size, u32 headroom, u64 size,
-				bool unaligned);
+struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+						struct xdp_umem *umem);
+int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+		  u16 queue_id, u16 flags);
 void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_release(struct xdp_buff_xsk *xskb);
+void xp_get_pool(struct xsk_buff_pool *pool);
+void xp_put_pool(struct xsk_buff_pool *pool);
+void xp_clear_dev(struct xsk_buff_pool *pool);
 
 /* AF_XDP, and XDP core. */
 void xp_free(struct xdp_buff_xsk *xskb);
-- 
cgit v1.2.3


From 7361f9c3d71955c624fdad5676c99fc88a8249e9 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:18 +0200
Subject: xsk: Move fill and completion rings to buffer pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the fill and completion rings from the umem to the buffer
pool. This so that we in a later commit can share the umem
between multiple HW queue ids. In this case, we need one fill and
completion ring per queue id. As the buffer pool is per queue id
and napi id this is a natural place for it and one umem
struture can be shared between these buffer pools.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-5-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      | 4 ++--
 include/net/xsk_buff_pool.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index ea2b020c4fbc..2a284e137e9a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -18,8 +18,6 @@ struct xsk_queue;
 struct xdp_buff;
 
 struct xdp_umem {
-	struct xsk_queue *fq;
-	struct xsk_queue *cq;
 	u64 size;
 	u32 headroom;
 	u32 chunk_size;
@@ -77,6 +75,8 @@ struct xdp_sock {
 	struct list_head map_list;
 	/* Protects map_list */
 	spinlock_t map_list_lock;
+	struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
+	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
 };
 
 #ifdef CONFIG_XDP_SOCKETS
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 4025486cc057..380d9aeedbea 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -30,6 +30,7 @@ struct xdp_buff_xsk {
 
 struct xsk_buff_pool {
 	struct xsk_queue *fq;
+	struct xsk_queue *cq;
 	struct list_head free_list;
 	dma_addr_t *dma_pages;
 	struct xdp_buff_xsk *heads;
@@ -57,7 +58,6 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 						struct xdp_umem *umem);
 int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
 		  u16 queue_id, u16 flags);
-void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_release(struct xdp_buff_xsk *xskb);
 void xp_get_pool(struct xsk_buff_pool *pool);
-- 
cgit v1.2.3


From c2d3d6a474629e30428b1622af3d551f560cd1d8 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:19 +0200
Subject: xsk: Move queue_id, dev and need_wakeup to buffer pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move queue_id, dev, and need_wakeup from the umem to the
buffer pool. This so that we in a later commit can share the umem
between multiple HW queues. There is one buffer pool per dev and
queue id, so these variables should belong to the buffer pool, not
the umem. Need_wakeup is also something that is set on a per napi
level, so there is usually one per device and queue id. So move
this to the buffer pool too.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-6-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      | 3 ---
 include/net/xsk_buff_pool.h | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 2a284e137e9a..b052f1c005a9 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -26,11 +26,8 @@ struct xdp_umem {
 	refcount_t users;
 	struct page **pgs;
 	u32 npgs;
-	u16 queue_id;
-	u8 need_wakeup;
 	u8 flags;
 	int id;
-	struct net_device *dev;
 	bool zc;
 	spinlock_t xsk_tx_list_lock;
 	struct list_head xsk_tx_list;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 380d9aeedbea..2d948905f05f 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -43,11 +43,15 @@ struct xsk_buff_pool {
 	u32 headroom;
 	u32 chunk_size;
 	u32 frame_len;
+	u16 queue_id;
+	u8 cached_need_wakeup;
+	bool uses_need_wakeup;
 	bool dma_need_sync;
 	bool unaligned;
 	struct xdp_umem *umem;
 	void *addrs;
 	struct device *dev;
+	struct net_device *netdev;
 	refcount_t users;
 	struct work_struct work;
 	struct xdp_buff_xsk *free_heads[];
-- 
cgit v1.2.3


From a5aa8e529e3667eb377ec132d4b4926dee065a45 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:20 +0200
Subject: xsk: Move xsk_tx_list and its lock to buffer pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the xsk_tx_list and the xsk_tx_list_lock from the umem to
the buffer pool. This so that we in a later commit can share the
umem between multiple HW queues. There is one xsk_tx_list per
device and queue id, so it should be located in the buffer pool.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-7-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      | 4 +---
 include/net/xsk_buff_pool.h | 5 +++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index b052f1c005a9..9a61d05ec132 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -29,8 +29,6 @@ struct xdp_umem {
 	u8 flags;
 	int id;
 	bool zc;
-	spinlock_t xsk_tx_list_lock;
-	struct list_head xsk_tx_list;
 };
 
 struct xsk_map {
@@ -57,7 +55,7 @@ struct xdp_sock {
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
 	struct xsk_queue *tx ____cacheline_aligned_in_smp;
-	struct list_head list;
+	struct list_head tx_list;
 	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
 	 * in the SKB destructor callback.
 	 */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 2d948905f05f..83f100c6d440 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -52,6 +52,9 @@ struct xsk_buff_pool {
 	void *addrs;
 	struct device *dev;
 	struct net_device *netdev;
+	struct list_head xsk_tx_list;
+	/* Protects modifications to the xsk_tx_list */
+	spinlock_t xsk_tx_list_lock;
 	refcount_t users;
 	struct work_struct work;
 	struct xdp_buff_xsk *free_heads[];
@@ -67,6 +70,8 @@ void xp_release(struct xdp_buff_xsk *xskb);
 void xp_get_pool(struct xsk_buff_pool *pool);
 void xp_put_pool(struct xsk_buff_pool *pool);
 void xp_clear_dev(struct xsk_buff_pool *pool);
+void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
 
 /* AF_XDP, and XDP core. */
 void xp_free(struct xdp_buff_xsk *xskb);
-- 
cgit v1.2.3


From 7f7ffa4e9c38f01d380ed9df6adb238fd5e6eea5 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:21 +0200
Subject: xsk: Move addrs from buffer pool to umem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replicate the addrs pointer in the buffer pool to the umem. This mapping
will be the same for all buffer pools sharing the same umem. In the
buffer pool we leave the addrs pointer for performance reasons.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-8-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9a61d05ec132..126d24364b5a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -18,6 +18,7 @@ struct xsk_queue;
 struct xdp_buff;
 
 struct xdp_umem {
+	void *addrs;
 	u64 size;
 	u32 headroom;
 	u32 chunk_size;
-- 
cgit v1.2.3


From 921b68692abb4fd02237b6875b2056bc59435116 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:22 +0200
Subject: xsk: Enable sharing of dma mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable the sharing of dma mappings by moving them out from the buffer
pool. Instead we put each dma mapped umem region in a list in the umem
structure. If dma has already been mapped for this umem and device, it
is not mapped again and the existing dma mappings are reused.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-9-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      |  1 +
 include/net/xsk_buff_pool.h | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 126d24364b5a..282aeba0d20f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
 	u8 flags;
 	int id;
 	bool zc;
+	struct list_head xsk_dma_list;
 };
 
 struct xsk_map {
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 83f100c6d440..356d0ac74eba 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -28,10 +28,23 @@ struct xdp_buff_xsk {
 	struct list_head free_list_node;
 };
 
+struct xsk_dma_map {
+	dma_addr_t *dma_pages;
+	struct device *dev;
+	struct net_device *netdev;
+	refcount_t users;
+	struct list_head list; /* Protected by the RTNL_LOCK */
+	u32 dma_pages_cnt;
+	bool dma_need_sync;
+};
+
 struct xsk_buff_pool {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
 	struct list_head free_list;
+	/* For performance reasons, each buff pool has its own array of dma_pages
+	 * even when they are identical.
+	 */
 	dma_addr_t *dma_pages;
 	struct xdp_buff_xsk *heads;
 	u64 chunk_mask;
-- 
cgit v1.2.3


From 8ef4e27eb3f03edfbfbe5657b8061f2a47757037 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:23 +0200
Subject: xsk: Rearrange internal structs for better performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rearrange the xdp_sock, xdp_umem and xsk_buff_pool structures so
that they get smaller and align better to the cache lines. In the
previous commits of this patch set, these structs have been
reordered with the focus on functionality and simplicity, not
performance. This patch improves throughput performance by around
3%.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-10-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h      | 13 +++++++------
 include/net/xsk_buff_pool.h | 27 +++++++++++++++------------
 2 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 282aeba0d20f..1a9559c0cbdd 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -23,13 +23,13 @@ struct xdp_umem {
 	u32 headroom;
 	u32 chunk_size;
 	u32 chunks;
+	u32 npgs;
 	struct user_struct *user;
 	refcount_t users;
-	struct page **pgs;
-	u32 npgs;
 	u8 flags;
-	int id;
 	bool zc;
+	struct page **pgs;
+	int id;
 	struct list_head xsk_dma_list;
 };
 
@@ -42,7 +42,7 @@ struct xsk_map {
 struct xdp_sock {
 	/* struct sock must be the first member of struct xdp_sock */
 	struct sock sk;
-	struct xsk_queue *rx;
+	struct xsk_queue *rx ____cacheline_aligned_in_smp;
 	struct net_device *dev;
 	struct xdp_umem *umem;
 	struct list_head flush_node;
@@ -54,8 +54,7 @@ struct xdp_sock {
 		XSK_BOUND,
 		XSK_UNBOUND,
 	} state;
-	/* Protects multiple processes in the control path */
-	struct mutex mutex;
+
 	struct xsk_queue *tx ____cacheline_aligned_in_smp;
 	struct list_head tx_list;
 	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
@@ -72,6 +71,8 @@ struct xdp_sock {
 	struct list_head map_list;
 	/* Protects map_list */
 	spinlock_t map_list_lock;
+	/* Protects multiple processes in the control path */
+	struct mutex mutex;
 	struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
 	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
 };
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 356d0ac74eba..38d03a64c9ea 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -39,9 +39,22 @@ struct xsk_dma_map {
 };
 
 struct xsk_buff_pool {
-	struct xsk_queue *fq;
-	struct xsk_queue *cq;
+	/* Members only used in the control path first. */
+	struct device *dev;
+	struct net_device *netdev;
+	struct list_head xsk_tx_list;
+	/* Protects modifications to the xsk_tx_list */
+	spinlock_t xsk_tx_list_lock;
+	refcount_t users;
+	struct xdp_umem *umem;
+	struct work_struct work;
 	struct list_head free_list;
+	u32 heads_cnt;
+	u16 queue_id;
+
+	/* Data path members as close to free_heads at the end as possible. */
+	struct xsk_queue *fq ____cacheline_aligned_in_smp;
+	struct xsk_queue *cq;
 	/* For performance reasons, each buff pool has its own array of dma_pages
 	 * even when they are identical.
 	 */
@@ -51,25 +64,15 @@ struct xsk_buff_pool {
 	u64 addrs_cnt;
 	u32 free_list_cnt;
 	u32 dma_pages_cnt;
-	u32 heads_cnt;
 	u32 free_heads_cnt;
 	u32 headroom;
 	u32 chunk_size;
 	u32 frame_len;
-	u16 queue_id;
 	u8 cached_need_wakeup;
 	bool uses_need_wakeup;
 	bool dma_need_sync;
 	bool unaligned;
-	struct xdp_umem *umem;
 	void *addrs;
-	struct device *dev;
-	struct net_device *netdev;
-	struct list_head xsk_tx_list;
-	/* Protects modifications to the xsk_tx_list */
-	spinlock_t xsk_tx_list_lock;
-	refcount_t users;
-	struct work_struct work;
 	struct xdp_buff_xsk *free_heads[];
 };
 
-- 
cgit v1.2.3


From 9647c57b11e563f5b33a49ef72b347753917c21c Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:24 +0200
Subject: xsk: i40e: ice: ixgbe: mlx5: Test for dma_need_sync earlier for
 better performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test for dma_need_sync earlier to increase
performance. xsk_buff_dma_sync_for_cpu() takes an xdp_buff as
parameter and from that the xsk_buff_pool reference is dug out. Perf
shows that this dereference causes a lot of cache misses. But as the
buffer pool is now sent down to the driver at zero-copy initialization
time, we might as well use this pointer directly, instead of going via
the xsk_buff and we can do so already in xsk_buff_dma_sync_for_cpu()
instead of in xp_dma_sync_for_cpu. This gets rid of these cache
misses.

Throughput increases with 3% for the xdpsock l2fwd sample application
on my machine.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-11-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock_drv.h  | 7 +++++--
 include/net/xsk_buff_pool.h | 3 ---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index a7c7d2eff860..5b1ee8a9976d 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -99,10 +99,13 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return xp_raw_get_data(pool, addr);
 }
 
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
 
+	if (!pool->dma_need_sync)
+		return;
+
 	xp_dma_sync_for_cpu(xskb);
 }
 
@@ -222,7 +225,7 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return NULL;
 }
 
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 }
 
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 38d03a64c9ea..907537dddcac 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -114,9 +114,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
 static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
 {
-	if (!xskb->pool->dma_need_sync)
-		return;
-
 	xp_dma_sync_for_cpu_slow(xskb);
 }
 
-- 
cgit v1.2.3


From b5aea28dca13456c1a08b9b2ef8a8b92598ac426 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:25 +0200
Subject: xsk: Add shared umem support between queue ids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support to share a umem between queue ids on the same
device. This mode can be invoked with the XDP_SHARED_UMEM bind
flag. Previously, sharing was only supported within the same
queue id and device, and you shared one set of fill and
completion rings. However, note that when sharing a umem between
queue ids, you need to create a fill ring and a completion ring
and tie them to the socket before you do the bind with the
XDP_SHARED_UMEM flag. This so that the single-producer
single-consumer semantics can be upheld.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-12-git-send-email-magnus.karlsson@intel.com
---
 include/net/xsk_buff_pool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 907537dddcac..0140d086dc84 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -81,6 +81,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 						struct xdp_umem *umem);
 int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
 		  u16 queue_id, u16 flags);
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+			 struct net_device *dev, u16 queue_id);
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_release(struct xdp_buff_xsk *xskb);
 void xp_get_pool(struct xsk_buff_pool *pool);
-- 
cgit v1.2.3


From 1d97898b36bab91e8ffb38a660cc40eaba613f88 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Fri, 28 Aug 2020 23:14:31 +0800
Subject: ipv6: add ipv6_fragment hook in ipv6_stub

Add ipv6_fragment to ipv6_stub to avoid calling netfilter when
access ip6_fragment.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6_stubs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index d7a7f7c81e7b..8fce558b5fea 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -63,6 +63,9 @@ struct ipv6_stub {
 			       int encap_type);
 #endif
 	struct neigh_table *nd_tbl;
+
+	int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+			     int (*output)(struct net *, struct sock *, struct sk_buff *));
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
 
-- 
cgit v1.2.3


From 5af68891dc16e1c8216705034a5e0144fd47779a Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 29 Aug 2020 05:21:30 -0400
Subject: net: clean up codestyle

This is a pure codestyle cleanup patch. No functional change intended.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h  | 2 +-
 include/net/sock.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 6ae2e625050d..8ea8812b0b41 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -214,7 +214,7 @@ dst_allfrag(const struct dst_entry *dst)
 static inline int
 dst_metric_locked(const struct dst_entry *dst, int metric)
 {
-	return dst_metric(dst, RTAX_LOCK) & (1<<metric);
+	return dst_metric(dst, RTAX_LOCK) & (1 << metric);
 }
 
 static inline void dst_hold(struct dst_entry *dst)
diff --git a/include/net/sock.h b/include/net/sock.h
index 064637d1ddf6..b943731fa879 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1478,7 +1478,7 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
 {
 	if (!sk_has_account(sk))
 		return true;
-	return size<= sk->sk_forward_alloc ||
+	return size <= sk->sk_forward_alloc ||
 		__sk_mem_schedule(sk, size, SK_MEM_RECV) ||
 		skb_pfmemalloc(skb);
 }
-- 
cgit v1.2.3


From afd6220999d4932616322f8a893371f8d0567a2a Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sun, 30 Aug 2020 11:33:58 +0300
Subject: net: phylink: add helper function to decode USXGMII word

With the new addition of the USXGMII link partner ability constants we
can now introduce a phylink helper that decodes the USXGMII word and
populates the appropriate fields in the phylink_link_state structure
based on them.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index c36fb41a7d90..d81a714cfbbd 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -490,4 +490,7 @@ void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
 
 void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
 				   struct phylink_link_state *state);
+
+void phylink_decode_usxgmii_word(struct phylink_link_state *state,
+				 uint16_t lpa);
 #endif
-- 
cgit v1.2.3


From 2dab432c5ae4f9c8eab3132ac0e9facf498ead92 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sun, 30 Aug 2020 11:34:00 +0300
Subject: net: mdiobus: add clause 45 mdiobus write accessor

Add the locked variant of the clause 45 mdiobus write accessor -
mdiobus_c45_write().

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 898cbf00332a..3a88b699b758 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -358,6 +358,12 @@ static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad,
 	return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum));
 }
 
+static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad,
+				    u16 regnum, u16 val)
+{
+	return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val);
+}
+
 int mdiobus_register_device(struct mdio_device *mdiodev);
 int mdiobus_unregister_device(struct mdio_device *mdiodev);
 bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
-- 
cgit v1.2.3


From 0da4c3d393e40e41e3c6b9f1cebaa498512c2abb Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sun, 30 Aug 2020 11:34:01 +0300
Subject: net: phy: add Lynx PCS module

Add a Lynx PCS module which exposes the necessary operations to drive
the PCS using phylink.

The majority of the code is extracted from the Felix DSA driver, which
will be also modified in a later patch, and exposed as a separate module
for code reusability purposes.
As such, this aims at feature and bug parity with the existing Felix DSA
driver, and thus USXGMII, SGMII, QSGMII and 2500Base-X (only w/o in-band
AN) are supported by the Lynx PCS module since these were also supported
by Felix.

The module can only be enabled by the drivers in need and not user
selectable.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs-lynx.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/linux/pcs-lynx.h

(limited to 'include')

diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
new file mode 100644
index 000000000000..a6440d6ebe95
--- /dev/null
+++ b/include/linux/pcs-lynx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2020 NXP
+ * Lynx PCS helpers
+ */
+
+#ifndef __LINUX_PCS_LYNX_H
+#define __LINUX_PCS_LYNX_H
+
+#include <linux/mdio.h>
+#include <linux/phylink.h>
+
+struct lynx_pcs {
+	struct phylink_pcs pcs;
+	struct mdio_device *mdio;
+};
+
+struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio);
+
+void lynx_pcs_destroy(struct lynx_pcs *pcs);
+
+#endif /* __LINUX_PCS_LYNX_H */
-- 
cgit v1.2.3


From 3f7d820bad6cace3ecb859bf3fa5bc56fe8bb1c6 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 31 Aug 2020 02:26:10 -0400
Subject: net: ipv6: remove unused arg exact_dif in compute_score

The arg exact_dif is not used anymore, remove it. inet6_exact_dif_match()
is no longer needed after the above is removed, remove it too.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bac8f4fffbd6..dda61d150a13 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -177,17 +177,6 @@ static inline int inet6_sdif(const struct sk_buff *skb)
 	return 0;
 }
 
-/* can not be used in TCP layer after tcp_v6_fill_cb */
-static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if defined(CONFIG_NET_L3_MASTER_DEV)
-	if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
-	    skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
-		return true;
-#endif
-	return false;
-}
-
 struct tcp6_request_sock {
 	struct tcp_request_sock	  tcp6rsk_tcp;
 };
-- 
cgit v1.2.3


From 34e1ec319e99322bfed02767d51f4998a961d205 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 31 Aug 2020 02:26:34 -0400
Subject: net: ipv4: remove unused arg exact_dif in compute_score

The arg exact_dif is not used anymore, remove it. inet_exact_dif_match()
is no longer needed after the above is removed, so remove it too.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab6c7510b5b..0d11db6436c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -941,16 +941,6 @@ INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
 
 #endif
 
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-	if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
-	    skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
-		return true;
-#endif
-	return false;
-}
-
 /* TCP_SKB_CB reference means this can not be used from early demux */
 static inline int tcp_v4_sdif(struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 144b0a0e608690d46e9a77819249bdd8d23bdcb6 Mon Sep 17 00:00:00 2001
From: Yaroslav Bolyukin <iam@lach.pw>
Date: Sat, 29 Aug 2020 18:59:53 +0500
Subject: ipvs: remove dependency on ip6_tables

This dependency was added because ipv6_find_hdr was in iptables specific
code but is no longer required

Fixes: f8f626754ebe ("ipv6: Move ipv6_find_hdr() out of Netfilter code.")
Fixes: 63dca2c0b0e7 ("ipvs: Fix faulty IPv6 extension header handling in IPVS")
Signed-off-by: Yaroslav Bolyukin <iam@lach.pw>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9a59a33787cb..d609e957a3ec 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -25,9 +25,6 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
 #include <net/ipv6.h>
-#if IS_ENABLED(CONFIG_IP_VS_IPV6)
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#endif
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>
 #endif
-- 
cgit v1.2.3


From c1077616142907bb6ee987ecd136d6857ffd8787 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 1 Sep 2020 15:10:08 -0700
Subject: ip: expose inet sockopts through inet_diag

Expose all exisiting inet sockopt bits through inet_diag for debug purpose.
Corresponding changes in iproute2 ss will be submitted to output all
these values.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h      |  2 ++
 include/uapi/linux/inet_diag.h | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0ef2d800fda7..84abb30a3fbb 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -75,6 +75,8 @@ static inline size_t inet_diag_msg_attrs_size(void)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 		+ nla_total_size_64bit(sizeof(u64))  /* INET_DIAG_CGROUP_ID */
 #endif
+		+ nla_total_size(sizeof(struct inet_diag_sockopt))
+						     /* INET_DIAG_SOCKOPT */
 		;
 }
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 5ba122c1949a..20ee93f0f876 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -160,6 +160,7 @@ enum {
 	INET_DIAG_ULP_INFO,
 	INET_DIAG_SK_BPF_STORAGES,
 	INET_DIAG_CGROUP_ID,
+	INET_DIAG_SOCKOPT,
 	__INET_DIAG_MAX,
 };
 
@@ -183,6 +184,23 @@ struct inet_diag_meminfo {
 	__u32	idiag_tmem;
 };
 
+/* INET_DIAG_SOCKOPT */
+
+struct inet_diag_sockopt {
+	__u8	recverr:1,
+		is_icsk:1,
+		freebind:1,
+		hdrincl:1,
+		mc_loop:1,
+		transparent:1,
+		mc_all:1,
+		nodefrag:1;
+	__u8	bind_address_no_port:1,
+		recverr_rfc4884:1,
+		defer_connect:1,
+		unused:5;
+};
+
 /* INET_DIAG_VEGASINFO */
 
 struct tcpvegas_info {
-- 
cgit v1.2.3


From 0f7c5317b89058e432d3e97efa19467ff4c3b86b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 4 Sep 2020 14:37:29 -0700
Subject: of: Export of_remove_property() to modules

We will need to remove some OF properties in drivers/net/dsa/bcm_sf2.c
with a subsequent commit. Export of_remove_property() to modules so we
can keep bcm_sf2 modular and provide an empty stub for when CONFIG_OF is
disabled to maintain the ability to compile test.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/of.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5cf7ae0465d1..481ec0467285 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -929,6 +929,11 @@ static inline int of_machine_is_compatible(const char *compat)
 	return 0;
 }
 
+static inline int of_remove_property(struct device_node *np, struct property *prop)
+{
+	return 0;
+}
+
 static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
 {
 	return false;
-- 
cgit v1.2.3


From 938c3efd9e650ca343d04e70d11a17c64119e17c Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 4 Sep 2020 17:14:53 +0100
Subject: bpf: Fix formatting in documentation for BPF helpers

Fix a formatting error in the description of bpf_load_hdr_opt() (rst2man
complains about a wrong indentation, but what is missing is actually a
blank line before the bullet list).

Fix and harmonise the formatting for other helpers.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200904161454.31135-3-quentin@isovalent.com
---
 include/uapi/linux/bpf.h | 87 +++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8dda13880957..90359cab501d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3349,38 +3349,38 @@ union bpf_attr {
  *	Description
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
  *	Description
  *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
  *		To achieve this, the helper needs *task*, which is a valid
- *		pointer to struct task_struct. To store the stacktrace, the
- *		bpf program provides *buf* with	a nonnegative *size*.
+ *		pointer to **struct task_struct**. To store the stacktrace, the
+ *		bpf program provides *buf* with a nonnegative *size*.
  *
  *		The last argument, *flags*, holds the number of stack frames to
  *		skip (from 0 to 255), masked with
@@ -3410,12 +3410,12 @@ union bpf_attr {
  * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
  *	Description
  *		Load header option.  Support reading a particular TCP header
- *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *		option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
  *
  *		If *flags* is 0, it will search the option from the
- *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		*skops*\ **->skb_data**.  The comment in **struct bpf_sock_ops**
  *		has details on what skb_data contains under different
- *		sock_ops->op.
+ *		*skops*\ **->op**.
  *
  *		The first byte of the *searchby_res* specifies the
  *		kind that it wants to search.
@@ -3435,7 +3435,7 @@ union bpf_attr {
  *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
  *
  *		To search for the standard window scale option (3),
- *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		the *searchby_res* should be [ 3, 0, 0, .... 0 ].
  *		Note, kind-length must be 0 for regular option.
  *
  *		Searching for No-Op (0) and End-of-Option-List (1) are
@@ -3445,27 +3445,30 @@ union bpf_attr {
  *		of a header option.
  *
  *		Supported flags:
+ *
  *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
  *		  saved_syn packet or the just-received syn packet.
  *
  *	Return
- *		>0 when found, the header option is copied to *searchby_res*.
- *		The return value is the total length copied.
+ *		> 0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied. On failure, a
+ *		negative error code is returned:
  *
- *		**-EINVAL** If param is invalid
+ *		**-EINVAL** if a parameter is invalid.
  *
- *		**-ENOMSG** The option is not found
+ *		**-ENOMSG** if the option is not found.
  *
- *		**-ENOENT** No syn packet available when
- *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *		**-ENOENT** if no syn packet is available when
+ *		**BPF_LOAD_HDR_OPT_TCP_SYN** is used.
  *
- *		**-ENOSPC** Not enough space.  Only *len* number of
- *			    bytes are copied.
+ *		**-ENOSPC** if there is not enough space.  Only *len* number of
+ *		bytes are copied.
  *
- *		**-EFAULT** Cannot parse the header options in the packet
+ *		**-EFAULT** on failure to parse the header options in the
+ *		packet.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
  *	Description
@@ -3483,44 +3486,44 @@ union bpf_attr {
  *		by searching the same option in the outgoing skb.
  *
  *		This helper can only be called during
- *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *		**BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
  *
  *	Return
  *		0 on success, or negative error in case of failure:
  *
- *		**-EINVAL** If param is invalid
+ *		**-EINVAL** If param is invalid.
  *
- *		**-ENOSPC** Not enough space in the header.
- *			    Nothing has been written
+ *		**-ENOSPC** if there is not enough space in the header.
+ *		Nothing has been written
  *
- *		**-EEXIST** The option has already existed
+ *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** Cannot parse the existing header options
+ *		**-EFAULT** on failrue to parse the existing header options.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
  *	Description
  *		Reserve *len* bytes for the bpf header option.  The
- *		space will be used by bpf_store_hdr_opt() later in
- *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *		space will be used by **bpf_store_hdr_opt**\ () later in
+ *		**BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
  *
- *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		If **bpf_reserve_hdr_opt**\ () is called multiple times,
  *		the total number of bytes will be reserved.
  *
  *		This helper can only be called during
- *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *		**BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
  *
  *	Return
  *		0 on success, or negative error in case of failure:
  *
- *		**-EINVAL** if param is invalid
+ *		**-EINVAL** if a parameter is invalid.
  *
- *		**-ENOSPC** Not enough space in the header.
+ *		**-ENOSPC** if there is not enough space in the header.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
@@ -3560,9 +3563,9 @@ union bpf_attr {
  *
  * long bpf_d_path(struct path *path, char *buf, u32 sz)
  *	Description
- *		Return full path for given 'struct path' object, which
- *		needs to be the kernel BTF 'path' object. The path is
- *		returned in the provided buffer 'buf' of size 'sz' and
+ *		Return full path for given **struct path** object, which
+ *		needs to be the kernel BTF *path* object. The path is
+ *		returned in the provided buffer *buf* of size *sz* and
  *		is zero terminated.
  *
  *	Return
@@ -3573,7 +3576,7 @@ union bpf_attr {
  * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
  * 	Description
  * 		Read *size* bytes from user space address *user_ptr* and store
- * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 		the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  */
-- 
cgit v1.2.3


From 5205e919c9f0c5b48678f2c787871c96f665ca1b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 7 Sep 2020 12:56:08 +0300
Subject: net: bridge: mcast: add support for src list and filter mode dumping

Support per port group src list (address and timer) and filter mode
dumping. Protected by either multicast_lock or rcu.

v3: add IPv6 support
v2: require RCU or multicast_lock to traverse src groups

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c1227aecd38f..75a2ac479247 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -455,10 +455,31 @@ enum {
 enum {
 	MDBA_MDB_EATTR_UNSPEC,
 	MDBA_MDB_EATTR_TIMER,
+	MDBA_MDB_EATTR_SRC_LIST,
+	MDBA_MDB_EATTR_GROUP_MODE,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
 
+/* per mdb entry source */
+enum {
+	MDBA_MDB_SRCLIST_UNSPEC,
+	MDBA_MDB_SRCLIST_ENTRY,
+	__MDBA_MDB_SRCLIST_MAX
+};
+#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1)
+
+/* per mdb entry per source attributes
+ * these are embedded in MDBA_MDB_SRCLIST_ENTRY
+ */
+enum {
+	MDBA_MDB_SRCATTR_UNSPEC,
+	MDBA_MDB_SRCATTR_ADDRESS,
+	MDBA_MDB_SRCATTR_TIMER,
+	__MDBA_MDB_SRCATTR_MAX
+};
+#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1)
+
 /* multicast router types */
 enum {
 	MDB_RTR_TYPE_DISABLED,
-- 
cgit v1.2.3


From 0db0c34cfbc9838c1a14cb04dd880602abd699a7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 3 Sep 2020 16:14:31 -0700
Subject: net: tighten the definition of interface statistics

This patch is born out of an investigation into which IEEE statistics
correspond to which struct rtnl_link_stats64 members. Turns out that
there seems to be reasonable consensus on the matter, among many drivers.
To save others the time (and it took more time than I'm comfortable
admitting) I'm adding comments referring to IEEE attributes to
struct rtnl_link_stats64.

Up until now we had two forms of documentation for stats - in
Documentation/ABI/testing/sysfs-class-net-statistics and the comments
on struct rtnl_link_stats64 itself. While the former is very cautious
in defining the expected behavior, the latter feel quite dated and
may not be easy to understand for modern day driver author
(e.g. rx_over_errors). At the same time modern systems are far more
complex and once obvious definitions lost their clarity. For example
- does rx_packet count at the MAC layer (aFramesReceivedOK)?
packets processed correctly by hardware? received by the driver?
or maybe received by the stack?

I tried to clarify the expectations, further clarifications from
others are very welcome.

The part hardest to untangle is rx_over_errors vs rx_fifo_errors
vs rx_missed_errors. After much deliberation I concluded that for
modern HW only two of the counters will make sense. The distinction
between internal FIFO overflow and packets dropped due to back-pressure
from the host is likely too implementation (driver and device) specific
to expose in the standard stats.

Now - which two of those counters we select to use is anyone's pick:

sysfs documentation suggests rx_over_errors counts packets which
did not fit into buffers due to MTU being too small, which I reused.
There don't seem to be many modern drivers using it (well, CAN drivers
seem to love this statistic).

Of the remaining two I picked rx_missed_errors to report device drops.
bnxt reports it and it's folded into "drop"s in procfs (while
rx_fifo_errors is an error, and modern devices usually receive the frame
OK, they just can't admit it into the pipeline).

Of the drivers I looked at only AMD Lance-like and NS8390-like use all
three of these counters. rx_missed_errors counts missed frames,
rx_over_errors counts overflow events, and rx_fifo_errors counts frames
which were truncated because they didn't fit into buffers. This suggests
that rx_fifo_errors may be the correct stat for truncated packets, but
I'd think a FIFO stat counting truncated packets would be very confusing
to a modern reader.

v2:
 - add driver developer notes about ethtool stat count and reset
 - replace Ethernet with IEEE 802.3 to better indicate source of attrs
 - mention byte counters don't count FCS
 - clarify RX counter is from device to host
 - drop "sightly" from sysfs paragraph
 - add examples of ethtool stats
 - s/incoming/received/ s/incoming/transmitted/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_link.h | 204 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 187 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7fba4de511de..bf4667403cab 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -40,26 +40,197 @@ struct rtnl_link_stats {
 	__u32	rx_nohandler;		/* dropped, no handler found	*/
 };
 
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter should not include packets
+ *   dropped by the device which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-	__u64	rx_packets;		/* total packets received	*/
-	__u64	tx_packets;		/* total packets transmitted	*/
-	__u64	rx_bytes;		/* total bytes received 	*/
-	__u64	tx_bytes;		/* total bytes transmitted	*/
-	__u64	rx_errors;		/* bad packets received		*/
-	__u64	tx_errors;		/* packet transmit problems	*/
-	__u64	rx_dropped;		/* no space in linux buffers	*/
-	__u64	tx_dropped;		/* no space available in linux	*/
-	__u64	multicast;		/* multicast packets received	*/
+	__u64	rx_packets;
+	__u64	tx_packets;
+	__u64	rx_bytes;
+	__u64	tx_bytes;
+	__u64	rx_errors;
+	__u64	tx_errors;
+	__u64	rx_dropped;
+	__u64	tx_dropped;
+	__u64	multicast;
 	__u64	collisions;
 
 	/* detailed rx_errors: */
 	__u64	rx_length_errors;
-	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u64	rx_frame_errors;	/* recv'd frame alignment error */
-	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u64	rx_missed_errors;	/* receiver missed packet	*/
+	__u64	rx_over_errors;
+	__u64	rx_crc_errors;
+	__u64	rx_frame_errors;
+	__u64	rx_fifo_errors;
+	__u64	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u64	tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
 	/* for cslip etc */
 	__u64	rx_compressed;
 	__u64	tx_compressed;
-
-	__u64	rx_nohandler;		/* dropped, no handler found	*/
+	__u64	rx_nohandler;
 };
 
 /* The struct should be in sync with struct ifmap */
-- 
cgit v1.2.3


From b131c96496b369c7b14125e7c50e89ac7cec8051 Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Tue, 8 Sep 2020 13:01:41 +0200
Subject: netfilter: nf_tables: add userdata support for nft_object

Enables storing userdata for nft_object. Initially this will store an
optional comment but can be extended in the future as needed.

Adds new attribute NFTA_OBJ_USERDATA to nft_object.

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 2 ++
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 97a7e147a59a..99c1b3188b1e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1123,6 +1123,8 @@ struct nft_object {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
+	u16				udlen;
+	u8				*udata;
 	/* runtime data below here */
 	const struct nft_object_ops	*ops ____cacheline_aligned;
 	unsigned char			data[]
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 543dc697b796..2a6e09dea1a0 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1559,6 +1559,7 @@ enum nft_ct_expectation_attributes {
  * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
  * @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
  * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
+ * @NFTA_OBJ_USERDATA: user data (NLA_BINARY)
  */
 enum nft_object_attributes {
 	NFTA_OBJ_UNSPEC,
@@ -1569,6 +1570,7 @@ enum nft_object_attributes {
 	NFTA_OBJ_USE,
 	NFTA_OBJ_HANDLE,
 	NFTA_OBJ_PAD,
+	NFTA_OBJ_USERDATA,
 	__NFTA_OBJ_MAX
 };
 #define NFTA_OBJ_MAX		(__NFTA_OBJ_MAX - 1)
-- 
cgit v1.2.3


From 245500d853e9f20036cec7df4f6984ece4c6bf26 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Jul 2020 11:15:32 +0100
Subject: rxrpc: Rewrite the client connection manager

Rewrite the rxrpc client connection manager so that it can support multiple
connections for a given security key to a peer.  The following changes are
made:

 (1) For each open socket, the code currently maintains an rbtree with the
     connections placed into it, keyed by communications parameters.  This
     is tricky to maintain as connections can be culled from the tree or
     replaced within it.  Connections can require replacement for a number
     of reasons, e.g. their IDs span too great a range for the IDR data
     type to represent efficiently, the call ID numbers on that conn would
     overflow or the conn got aborted.

     This is changed so that there's now a connection bundle object placed
     in the tree, keyed on the same parameters.  The bundle, however, does
     not need to be replaced.

 (2) An rxrpc_bundle object can now manage the available channels for a set
     of parallel connections.  The lock that manages this is moved there
     from the rxrpc_connection struct (channel_lock).

 (3) There'a a dummy bundle for all incoming connections to share so that
     they have a channel_lock too.  It might be better to give each
     incoming connection its own bundle.  This bundle is not needed to
     manage which channels incoming calls are made on because that's the
     solely at whim of the client.

 (4) The restrictions on how many client connections are around are
     removed.  Instead, a previous patch limits the number of client calls
     that can be allocated.  Ordinarily, client connections are reaped
     after 2 minutes on the idle queue, but when more than a certain number
     of connections are in existence, the reaper starts reaping them after
     2s of idleness instead to get the numbers back down.

     It could also be made such that new call allocations are forced to
     wait until the number of outstanding connections subsides.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index c33079b986e8..3b67d5981224 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -68,21 +68,14 @@ enum rxrpc_client_trace {
 	rxrpc_client_chan_activate,
 	rxrpc_client_chan_disconnect,
 	rxrpc_client_chan_pass,
-	rxrpc_client_chan_unstarted,
 	rxrpc_client_chan_wait_failed,
 	rxrpc_client_cleanup,
-	rxrpc_client_count,
 	rxrpc_client_discard,
 	rxrpc_client_duplicate,
 	rxrpc_client_exposed,
 	rxrpc_client_replace,
 	rxrpc_client_to_active,
-	rxrpc_client_to_culled,
 	rxrpc_client_to_idle,
-	rxrpc_client_to_inactive,
-	rxrpc_client_to_upgrade,
-	rxrpc_client_to_waiting,
-	rxrpc_client_uncount,
 };
 
 enum rxrpc_call_trace {
@@ -271,29 +264,14 @@ enum rxrpc_tx_point {
 	EM(rxrpc_client_chan_activate,		"ChActv") \
 	EM(rxrpc_client_chan_disconnect,	"ChDisc") \
 	EM(rxrpc_client_chan_pass,		"ChPass") \
-	EM(rxrpc_client_chan_unstarted,		"ChUnst") \
 	EM(rxrpc_client_chan_wait_failed,	"ChWtFl") \
 	EM(rxrpc_client_cleanup,		"Clean ") \
-	EM(rxrpc_client_count,			"Count ") \
 	EM(rxrpc_client_discard,		"Discar") \
 	EM(rxrpc_client_duplicate,		"Duplic") \
 	EM(rxrpc_client_exposed,		"Expose") \
 	EM(rxrpc_client_replace,		"Replac") \
 	EM(rxrpc_client_to_active,		"->Actv") \
-	EM(rxrpc_client_to_culled,		"->Cull") \
-	EM(rxrpc_client_to_idle,		"->Idle") \
-	EM(rxrpc_client_to_inactive,		"->Inac") \
-	EM(rxrpc_client_to_upgrade,		"->Upgd") \
-	EM(rxrpc_client_to_waiting,		"->Wait") \
-	E_(rxrpc_client_uncount,		"Uncoun")
-
-#define rxrpc_conn_cache_states \
-	EM(RXRPC_CONN_CLIENT_INACTIVE,		"Inac") \
-	EM(RXRPC_CONN_CLIENT_WAITING,		"Wait") \
-	EM(RXRPC_CONN_CLIENT_ACTIVE,		"Actv") \
-	EM(RXRPC_CONN_CLIENT_UPGRADE,		"Upgd") \
-	EM(RXRPC_CONN_CLIENT_CULLED,		"Cull") \
-	E_(RXRPC_CONN_CLIENT_IDLE,		"Idle") \
+	E_(rxrpc_client_to_idle,		"->Idle")
 
 #define rxrpc_call_traces \
 	EM(rxrpc_call_connected,		"CON") \
@@ -594,23 +572,20 @@ TRACE_EVENT(rxrpc_client,
 		    __field(int,			channel		)
 		    __field(int,			usage		)
 		    __field(enum rxrpc_client_trace,	op		)
-		    __field(enum rxrpc_conn_cache_state, cs		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->conn = conn->debug_id;
+		    __entry->conn = conn ? conn->debug_id : 0;
 		    __entry->channel = channel;
-		    __entry->usage = atomic_read(&conn->usage);
+		    __entry->usage = conn ? atomic_read(&conn->usage) : -2;
 		    __entry->op = op;
 		    __entry->cid = conn->proto.cid;
-		    __entry->cs = conn->cache_state;
 			   ),
 
-	    TP_printk("C=%08x h=%2d %s %s i=%08x u=%d",
+	    TP_printk("C=%08x h=%2d %s i=%08x u=%d",
 		      __entry->conn,
 		      __entry->channel,
 		      __print_symbolic(__entry->op, rxrpc_client_traces),
-		      __print_symbolic(__entry->cs, rxrpc_conn_cache_states),
 		      __entry->cid,
 		      __entry->usage)
 	    );
-- 
cgit v1.2.3


From 2efbe6aebea00269425ac7de622d47c2a397a871 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:34 +0300
Subject: devlink: Add comment block for missing port attributes

Add comment block for physical, PF and VF port attributes.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8f3c8a443238..3c7ba3e1f490 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -73,6 +73,9 @@ struct devlink_port_pci_vf_attrs {
  * @splittable: indicates if the port can be split.
  * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink.
  * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL
+ * @phys: physical port attributes
+ * @pci_pf: PCI PF port attributes
+ * @pci_vf: PCI VF port attributes
  */
 struct devlink_port_attrs {
 	u8 split:1,
-- 
cgit v1.2.3


From ff03e63ad1673eb75cce214556013fc2e52a1b77 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:35 +0300
Subject: devlink: Move structure comments outside of structure

To add more fields to the PCI PF and VF port attributes, follow standard
structure comment format.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3c7ba3e1f490..efff9274d248 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -57,13 +57,22 @@ struct devlink_port_phys_attrs {
 	u32 split_subport_number; /* If the port is split, this is the number of subport. */
 };
 
+/**
+ * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
+ * @pf: Associated PCI PF number for this port.
+ */
 struct devlink_port_pci_pf_attrs {
-	u16 pf;	/* Associated PCI PF for this port. */
+	u16 pf;
 };
 
+/**
+ * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
+ * @pf: Associated PCI PF number for this port.
+ * @vf: Associated PCI VF for of the PCI PF for this port.
+ */
 struct devlink_port_pci_vf_attrs {
-	u16 pf;	/* Associated PCI PF for this port. */
-	u16 vf;	/* Associated PCI VF for of the PCI PF for this port. */
+	u16 pf;
+	u16 vf;
 };
 
 /**
-- 
cgit v1.2.3


From 05b595e9c44acaca94192c6db430a489c1b212a7 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:36 +0300
Subject: devlink: Introduce external controller flag

A devlink eswitch port may represent PCI PF/VF ports of a controller.

A controller either located on same system or it can be an external
controller located in host where such NIC is plugged in.

Add the ability for driver to specify if a port is for external
controller.

Use such flag in the mlx5_core driver.

An example of an external controller having VF1 of PF0 belong to
controller 1.

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf pfnum 0 vfnum 1 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00
$ devlink port show pci/0000:06:00.0/2 -jp
{
    "port": {
        "pci/0000:06:00.0/2": {
            "type": "eth",
            "netdev": "ens2f0pf0vf1",
            "flavour": "pcivf",
            "pfnum": 0,
            "vfnum": 1,
            "external": true,
            "splittable": false,
            "function": {
                "hw_addr": "00:00:00:00:00:00"
            }
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 8 ++++++--
 include/uapi/linux/devlink.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index efff9274d248..2dad8c9151f4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -60,19 +60,23 @@ struct devlink_port_phys_attrs {
 /**
  * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
  * @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_pf_attrs {
 	u16 pf;
+	u8 external:1;
 };
 
 /**
  * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
  * @pf: Associated PCI PF number for this port.
  * @vf: Associated PCI VF for of the PCI PF for this port.
+ * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_vf_attrs {
 	u16 pf;
 	u16 vf;
+	u8 external:1;
 };
 
 /**
@@ -1215,9 +1219,9 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_attrs_set(struct devlink_port *devlink_port,
 			    struct devlink_port_attrs *devlink_port_attrs);
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf);
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external);
 void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
-				   u16 pf, u16 vf);
+				   u16 pf, u16 vf, bool external);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
 			u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index cfef4245ea5a..40823ed7e05a 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -458,6 +458,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_LANES,			/* u32 */
 	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
 
+	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From 3a2d9588c4f79adae6a0e986b64ebdd5b38085c6 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:37 +0300
Subject: devlink: Introduce controller number

A devlink port may be for a controller consist of PCI device.
A devlink instance holds ports of two types of controllers.
(1) controller discovered on same system where eswitch resides
This is the case where PCI PF/VF of a controller and devlink eswitch
instance both are located on a single system.
(2) controller located on external host system.
This is the case where a controller is located in one system and its
devlink eswitch ports are located in a different system.

When a devlink eswitch instance serves the devlink ports of both
controllers together, PCI PF/VF numbers may overlap.
Due to this a unique phys_port_name cannot be constructed.

For example in below such system controller-0 and controller-1, each has
PCI PF pf0 whose eswitch ports can be present in controller-0.
These results in phys_port_name as "pf0" for both.
Similar problem exists for VFs and upcoming Sub functions.

An example view of two controller systems:

             ---------------------------------------------------------
             |                                                       |
             |           --------- ---------         ------- ------- |
-----------  |           | vf(s) | | sf(s) |         |vf(s)| |sf(s)| |
| server  |  | -------   ----/---- ---/----- ------- ---/--- ---/--- |
| pci rc  |=== | pf0 |______/________/       | pf1 |___/_______/     |
| connect |  | -------                       -------                 |
-----------  |     | controller_num=1 (no eswitch)                   |
             ------|--------------------------------------------------
             (internal wire)
                   |
             ---------------------------------------------------------
             | devlink eswitch ports and reps                        |
             | ----------------------------------------------------- |
             | |ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 |ctrl-0 | |
             | |pf0    | pf0vfN | pf0sfN | pf1    | pf1vfN |pf1sfN | |
             | ----------------------------------------------------- |
             | |ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 |ctrl-1 | |
             | |pf1    | pf1vfN | pf1sfN | pf1    | pf1vfN |pf0sfN | |
             | ----------------------------------------------------- |
             |                                                       |
             |                                                       |
             |           --------- ---------         ------- ------- |
             |           | vf(s) | | sf(s) |         |vf(s)| |sf(s)| |
             | -------   ----/---- ---/----- ------- ---/--- ---/--- |
             | | pf0 |______/________/       | pf1 |___/_______/     |
             | -------                       -------                 |
             |                                                       |
             |  local controller_num=0 (eswitch)                     |
             ---------------------------------------------------------

An example devlink port for external controller with controller
number = 1 for a VF 1 of PF 0:

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf controller 1 pfnum 0 vfnum 1 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00

$ devlink port show pci/0000:06:00.0/2 -jp
{
    "port": {
        "pci/0000:06:00.0/2": {
            "type": "eth",
            "netdev": "ens2f0pf0vf1",
            "flavour": "pcivf",
            "controller": 1,
            "pfnum": 0,
            "vfnum": 1,
            "external": true,
            "splittable": false,
            "function": {
                "hw_addr": "00:00:00:00:00:00"
            }
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 9 +++++++--
 include/uapi/linux/devlink.h | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2dad8c9151f4..eaec0a8cc5ef 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -59,21 +59,25 @@ struct devlink_port_phys_attrs {
 
 /**
  * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
+ * @controller: Associated controller number
  * @pf: Associated PCI PF number for this port.
  * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_pf_attrs {
+	u32 controller;
 	u16 pf;
 	u8 external:1;
 };
 
 /**
  * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
+ * @controller: Associated controller number
  * @pf: Associated PCI PF number for this port.
  * @vf: Associated PCI VF for of the PCI PF for this port.
  * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_vf_attrs {
+	u32 controller;
 	u16 pf;
 	u16 vf;
 	u8 external:1;
@@ -1219,8 +1223,9 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_attrs_set(struct devlink_port *devlink_port,
 			    struct devlink_port_attrs *devlink_port_attrs);
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external);
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, bool external);
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
 				   u16 pf, u16 vf, bool external);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 40823ed7e05a..40d35145c879 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -459,6 +459,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
 
 	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From 501cb008906631a019f3ab2104a17ef8b2651ed0 Mon Sep 17 00:00:00 2001
From: Paul Davey <paul.davey@alliedtelesis.co.nz>
Date: Tue, 8 Sep 2020 10:04:06 +1200
Subject: ipmr: Add route table ID to netlink cache reports

Insert the multicast route table ID as a Netlink attribute to Netlink
cache report notifications.

When multiple route tables are in use it is necessary to have a way to
determine which route table a given cache report belongs to when
receiving the cache report.

Signed-off-by: Paul Davey <paul.davey@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 11c8c1fc1124..918f1ef32ffe 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -169,6 +169,7 @@ enum {
 	IPMRA_CREPORT_SRC_ADDR,
 	IPMRA_CREPORT_DST_ADDR,
 	IPMRA_CREPORT_PKT,
+	IPMRA_CREPORT_TABLE,
 	__IPMRA_CREPORT_MAX
 };
 #define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1)
-- 
cgit v1.2.3


From c8715a8e9f38906e73d6d78764216742db13ba0e Mon Sep 17 00:00:00 2001
From: Paul Davey <paul.davey@alliedtelesis.co.nz>
Date: Tue, 8 Sep 2020 10:04:07 +1200
Subject: ipmr: Add high byte of VIF ID to igmpmsg

Use the unused3 byte in struct igmpmsg to hold the high 8 bits of the
VIF ID.

If using more than 255 IPv4 multicast interfaces it is necessary to have
access to a VIF ID for cache reports that is wider than 8 bits, the VIF
ID present in the igmpmsg reports sent to mroute_sk was only 8 bits wide
in the igmpmsg header.  Adding the high 8 bits of the 16 bit VIF ID in
the unused byte allows use of more than 255 IPv4 multicast interfaces.

Signed-off-by: Paul Davey <paul.davey@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 918f1ef32ffe..1a42f5f9b31b 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -113,8 +113,8 @@ struct igmpmsg {
 	__u32 unused1,unused2;
 	unsigned char im_msgtype;		/* What is this */
 	unsigned char im_mbz;			/* Must be zero */
-	unsigned char im_vif;			/* Interface (this ought to be a vifi_t!) */
-	unsigned char unused3;
+	unsigned char im_vif;			/* Low 8 bits of Interface */
+	unsigned char im_vif_hi;		/* High 8 bits of Interface */
 	struct in_addr im_src,im_dst;
 };
 
-- 
cgit v1.2.3


From 5198d545dba8ad893f5e5a029ca8d43ee7bcf011 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Sep 2020 10:37:51 -0700
Subject: net: remove napi_hash_del() from driver-facing API

We allow drivers to call napi_hash_del() before calling
netif_napi_del() to batch RCU grace periods. This makes
the API asymmetric and leaks internal implementation details.
Soon we will want the grace period to protect more than just
the NAPI hash table.

Restructure the API and have drivers call a new function -
__netif_napi_del() if they want to take care of RCU waits.

Note that only core was checking the return status from
napi_hash_del() so the new helper does not report if the
NAPI was actually deleted.

Some notes on driver oddness:
 - veth observed the grace period before calling netif_napi_del()
   but that should not matter
 - myri10ge observed normal RCU flavor
 - bnx2x and enic did not actually observe the grace period
   (unless they did so implicitly)
 - virtio_net and enic only unhashed Rx NAPIs

The last two points seem to indicate that the calls to
napi_hash_del() were a left over rather than an optimization.
Regardless, it's easy enough to correct them.

This patch may introduce extra synchronize_net() calls for
interfaces which set NAPI_STATE_NO_BUSY_POLL and depend on
free_netdev() to call netif_napi_del(). This seems inevitable
since we want to use RCU for netpoll dev->napi_list traversal,
and almost no drivers set IFF_DISABLE_NETPOLL.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f9fcfd15942..74ed95215091 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -70,6 +70,7 @@ struct udp_tunnel_nic;
 struct bpf_prog;
 struct xdp_buff;
 
+void synchronize_net(void);
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
 
@@ -488,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n)
 	return napi_complete_done(n, 0);
 }
 
-/**
- *	napi_hash_del - remove a NAPI from global table
- *	@napi: NAPI context
- *
- * Warning: caller must observe RCU grace period
- * before freeing memory containing @napi, if
- * this function returns true.
- * Note: core networking stack automatically calls it
- * from netif_napi_del().
- * Drivers might want to call this helper to combine all
- * the needed RCU grace periods into a single one.
- */
-bool napi_hash_del(struct napi_struct *napi);
-
 /**
  *	napi_disable - prevent NAPI from scheduling
  *	@n: NAPI context
@@ -2367,13 +2354,27 @@ static inline void netif_tx_napi_add(struct net_device *dev,
 	netif_napi_add(dev, napi, poll, weight);
 }
 
+/**
+ *  __netif_napi_del - remove a NAPI context
+ *  @napi: NAPI context
+ *
+ * Warning: caller must observe RCU grace period before freeing memory
+ * containing @napi. Drivers might want to call this helper to combine
+ * all the needed RCU grace periods into a single one.
+ */
+void __netif_napi_del(struct napi_struct *napi);
+
 /**
  *  netif_napi_del - remove a NAPI context
  *  @napi: NAPI context
  *
  *  netif_napi_del() removes a NAPI context from the network device NAPI list
  */
-void netif_napi_del(struct napi_struct *napi);
+static inline void netif_napi_del(struct napi_struct *napi)
+{
+	__netif_napi_del(napi);
+	synchronize_net();
+}
 
 struct napi_gro_cb {
 	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
@@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev)
 int netdev_refcnt_read(const struct net_device *dev);
 void free_netdev(struct net_device *dev);
 void netdev_freemem(struct net_device *dev);
-void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
 struct net_device *netdev_get_xmit_slave(struct net_device *dev,
-- 
cgit v1.2.3


From 4d092dd2041a5f328410ad16d63b8606c24e8604 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Sep 2020 10:37:52 -0700
Subject: net: manage napi add/del idempotence explicitly

To RCUify napi->dev_list we need to replace list_del_init()
with list_del_rcu(). There is no _init() version for RCU for
obvious reasons. Up until now netif_napi_del() was idempotent
so to make sure it remains such add a bit which is set when
NAPI is listed, and cleared when it removed. Since we don't
expect multiple calls to netif_napi_add() to be correct,
add a warning on that side.

Now that napi_hash_add / napi_hash_del are only called by
napi_add / del we can actually steal its bit. We just need
to make sure hash node is initialized correctly.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 74ed95215091..157e0242e9ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -355,7 +355,7 @@ enum {
 	NAPI_STATE_MISSED,	/* reschedule a napi */
 	NAPI_STATE_DISABLE,	/* Disable pending */
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
-	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
+	NAPI_STATE_LISTED,	/* NAPI added to system lists */
 	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
 	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
 };
@@ -365,7 +365,7 @@ enum {
 	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
 	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
 	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
-	NAPIF_STATE_HASHED	 = BIT(NAPI_STATE_HASHED),
+	NAPIF_STATE_LISTED	 = BIT(NAPI_STATE_LISTED),
 	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
 	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 };
-- 
cgit v1.2.3


From e9b12edc133b54e15ecd105620d51fb8e8fa8bde Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 9 Sep 2020 17:50:46 -0700
Subject: tcp: record received TOS value in the request socket

A new field is added to the request sock to record the TOS value
received on the listening socket during 3WHS:
When not under syn flood, it is recording the TOS value sent in SYN.
When under syn flood, it is recording the TOS value sent in the ACK.
This is a preparation patch in order to do TOS reflection in the later
commit.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 56ff2952edaf..2f87377e9af7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -134,6 +134,7 @@ struct tcp_request_sock {
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
 						  */
+	u8				syn_tos;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
-- 
cgit v1.2.3


From de033b7d1568a8f1252055c96cdd99954d5450c4 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 9 Sep 2020 17:50:47 -0700
Subject: ip: pass tos into ip_build_and_send_pkt()

This commit adds tos as a new passed in parameter to
ip_build_and_send_pkt() which will be used in the later commit.
This is a pure restructure and does not have any functional change.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index b09c48d862cc..0f72bf8c0cbf 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -151,7 +151,7 @@ int igmp_mc_init(void);
 
 int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 			  __be32 saddr, __be32 daddr,
-			  struct ip_options_rcu *opt);
+			  struct ip_options_rcu *opt, u8 tos);
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	   struct net_device *orig_dev);
 void ip_list_rcv(struct list_head *head, struct packet_type *pt,
-- 
cgit v1.2.3


From ac8f1710c12bb4c3626280ce03f05459ba8feef6 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 9 Sep 2020 17:50:48 -0700
Subject: tcp: reflect tos value received in SYN to the socket

This commit adds a new TCP feature to reflect the tos value received in
SYN, and send it out on the SYN-ACK, and eventually set the tos value of
the established socket with this reflected tos value. This provides a
way to set the traffic class/QoS level for all traffic in the same
connection to be the same as the incoming SYN request. It could be
useful in data centers to provide equivalent QoS according to the
incoming request.
This feature is guarded by /proc/sys/net/ipv4/tcp_reflect_tos, and is by
default turned off.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9e36738c1fe1..8e4fcac4df72 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -183,6 +183,7 @@ struct netns_ipv4 {
 	unsigned int sysctl_tcp_fastopen_blackhole_timeout;
 	atomic_t tfo_active_disable_times;
 	unsigned long tfo_active_disable_stamp;
+	int sysctl_tcp_reflect_tos;
 
 	int sysctl_udp_wmem_min;
 	int sysctl_udp_rmem_min;
-- 
cgit v1.2.3


From d66423fbe11e141206f117b232916aa899d44959 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 10 Sep 2020 12:02:48 +0100
Subject: bpf: Plug hole in struct bpf_sk_lookup_kern

As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
This leads to suboptimal instructions being generated (IPv4, x86):

    1372                    struct bpf_sk_lookup_kern ctx = {
       0xffffffff81b87f30 <+624>:   xor    %eax,%eax
       0xffffffff81b87f32 <+626>:   mov    $0x6,%ecx
       0xffffffff81b87f37 <+631>:   lea    0x90(%rsp),%rdi
       0xffffffff81b87f3f <+639>:   movl   $0x110002,0x88(%rsp)
       0xffffffff81b87f4a <+650>:   rep stos %rax,%es:(%rdi)
       0xffffffff81b87f4d <+653>:   mov    0x8(%rsp),%eax
       0xffffffff81b87f51 <+657>:   mov    %r13d,0x90(%rsp)
       0xffffffff81b87f59 <+665>:   incl   %gs:0x7e4970a0(%rip)
       0xffffffff81b87f60 <+672>:   mov    %eax,0x8c(%rsp)
       0xffffffff81b87f67 <+679>:   movzwl 0x10(%rsp),%eax
       0xffffffff81b87f6c <+684>:   mov    %ax,0xa8(%rsp)
       0xffffffff81b87f74 <+692>:   movzwl 0x38(%rsp),%eax
       0xffffffff81b87f79 <+697>:   mov    %ax,0xaa(%rsp)

Fix this by moving around sport and dport. pahole confirms there
are no more holes:

    struct bpf_sk_lookup_kern {
        u16                        family;       /*     0     2 */
        u16                        protocol;     /*     2     2 */
        __be16                     sport;        /*     4     2 */
        u16                        dport;        /*     6     2 */
        struct {
                __be32             saddr;        /*     8     4 */
                __be32             daddr;        /*    12     4 */
        } v4;                                    /*     8     8 */
        struct {
                const struct in6_addr  * saddr;  /*    16     8 */
                const struct in6_addr  * daddr;  /*    24     8 */
        } v6;                                    /*    16    16 */
        struct sock *              selected_sk;  /*    32     8 */
        bool                       no_reuseport; /*    40     1 */

        /* size: 48, cachelines: 1, members: 8 */
        /* padding: 7 */
        /* last cacheline: 48 bytes */
    };

The assembly also doesn't contain the pesky rep stos anymore:

    1372                    struct bpf_sk_lookup_kern ctx = {
       0xffffffff81b87f60 <+624>:   movzwl 0x10(%rsp),%eax
       0xffffffff81b87f65 <+629>:   movq   $0x0,0xa8(%rsp)
       0xffffffff81b87f71 <+641>:   movq   $0x0,0xb0(%rsp)
       0xffffffff81b87f7d <+653>:   mov    %ax,0x9c(%rsp)
       0xffffffff81b87f85 <+661>:   movzwl 0x38(%rsp),%eax
       0xffffffff81b87f8a <+666>:   movq   $0x0,0xb8(%rsp)
       0xffffffff81b87f96 <+678>:   mov    %ax,0x9e(%rsp)
       0xffffffff81b87f9e <+686>:   mov    0x8(%rsp),%eax
       0xffffffff81b87fa2 <+690>:   movq   $0x0,0xc0(%rsp)
       0xffffffff81b87fae <+702>:   movl   $0x110002,0x98(%rsp)
       0xffffffff81b87fb9 <+713>:   mov    %eax,0xa0(%rsp)
       0xffffffff81b87fc0 <+720>:   mov    %r13d,0xa4(%rsp)

1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/

Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20200910110248.198326-1-lmb@cloudflare.com
---
 include/linux/filter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 995625950cc1..e962dd8117d8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1287,6 +1287,8 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
 struct bpf_sk_lookup_kern {
 	u16		family;
 	u16		protocol;
+	__be16		sport;
+	u16		dport;
 	struct {
 		__be32 saddr;
 		__be32 daddr;
@@ -1295,8 +1297,6 @@ struct bpf_sk_lookup_kern {
 		const struct in6_addr *saddr;
 		const struct in6_addr *daddr;
 	} v6;
-	__be16		sport;
-	u16		dport;
 	struct sock	*selected_sk;
 	bool		no_reuseport;
 };
-- 
cgit v1.2.3


From 1aef5b4391f0c75c0a1523706a7b0311846ee12f Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 10 Sep 2020 13:33:14 -0700
Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup()

This should be "current" not "skb".

Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)")
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com
---
 include/uapi/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 90359cab501d..7dd314176df7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1447,8 +1447,8 @@ union bpf_attr {
  * 	Return
  * 		The return value depends on the result of the test, and can be:
  *
- * 		* 0, if the *skb* task belongs to the cgroup2.
- * 		* 1, if the *skb* task does not belong to the cgroup2.
+ *		* 0, if current task belongs to the cgroup2.
+ *		* 1, if current task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
  * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
-- 
cgit v1.2.3


From 8919a9b31eb4fb4c0a93e5fb350a626924302aa6 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 10 Sep 2020 15:35:32 -0400
Subject: tcp: Only init congestion control if not initialized already

Change tcp_init_transfer() to only initialize congestion control if it
has not been initialized already.

With this new approach, we can arrange things so that if the EBPF code
sets the congestion control by calling setsockopt(TCP_CONGESTION) then
tcp_init_transfer() will not re-initialize the CC module.

This is an approach that has the following beneficial properties:

(1) This allows CC module customizations made by the EBPF called in
    tcp_init_transfer() to persist, and not be wiped out by a later
    call to tcp_init_congestion_control() in tcp_init_transfer().

(2) Does not flip the order of EBPF and CC init, to avoid causing bugs
    for existing code upstream that depends on the current order.

(3) Does not cause 2 initializations for for CC in the case where the
    EBPF called in tcp_init_transfer() wants to set the CC to a new CC
    algorithm.

(4) Allows follow-on simplifications to the code in net/core/filter.c
    and net/ipv4/tcp_cong.c, which currently both have some complexity
    to special-case CC initialization to avoid double CC
    initialization if EBPF sets the CC.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Kevin Yang <yyd@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
---
 include/net/inet_connection_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c738abeb3265..dc763ca9413c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -96,7 +96,8 @@ struct inet_connection_sock {
 	void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
 	struct hlist_node         icsk_listen_portaddr_node;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
-	__u8			  icsk_ca_state:6,
+	__u8			  icsk_ca_state:5,
+				  icsk_ca_initialized:1,
 				  icsk_ca_setsockopt:1,
 				  icsk_ca_dst_locked:1;
 	__u8			  icsk_retransmits;
-- 
cgit v1.2.3


From 29a949325c6c90f1431db9af64592275c83d9b2a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 10 Sep 2020 15:35:34 -0400
Subject: tcp: simplify tcp_set_congestion_control(): Always reinitialize

Now that the previous patches ensure that all call sites for
tcp_set_congestion_control() want to initialize congestion control, we
can simplify tcp_set_congestion_control() by removing the reinit
argument and the code to support it.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Kevin Yang <yyd@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e85d564446c6..f857146c17a5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1104,7 +1104,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
 int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
-			       bool reinit, bool cap_net_admin);
+			       bool cap_net_admin);
 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
-- 
cgit v1.2.3


From d5ea32da878d4572761ca4580631748e398542de Mon Sep 17 00:00:00 2001
From: Daniel Winkler <danielwinkler@google.com>
Date: Tue, 25 Aug 2020 16:31:51 -0700
Subject: Bluetooth: Add MGMT capability flags for tx power and ext advertising

For new advertising features, it will be important for userspace to
know the capabilities of the controller and kernel. If the controller
and kernel support extended advertising, we include flags indicating
hardware offloading support and support for setting tx power of adv
instances.

In the future, vendor-specific commands may allow the setting of tx
power in advertising instances, but for now this feature is only
marked available if extended advertising is supported.

This change is manually verified in userspace by ensuring the
advertising manager's supported_flags field is updated with new flags on
hatch chromebook (ext advertising supported).

Signed-off-by: Daniel Winkler <danielwinkler@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index beae5c3980f0..9ad505b9e694 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -572,6 +572,8 @@ struct mgmt_rp_add_advertising {
 #define MGMT_ADV_FLAG_SEC_1M 		BIT(7)
 #define MGMT_ADV_FLAG_SEC_2M 		BIT(8)
 #define MGMT_ADV_FLAG_SEC_CODED 	BIT(9)
+#define MGMT_ADV_FLAG_CAN_SET_TX_POWER	BIT(10)
+#define MGMT_ADV_FLAG_HW_OFFLOAD	BIT(11)
 
 #define MGMT_ADV_FLAG_SEC_MASK	(MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
 				 MGMT_ADV_FLAG_SEC_CODED)
-- 
cgit v1.2.3


From 568a36a69bad4f2efcfa4f94c83aa150a463735c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 10 Sep 2020 19:48:54 +0300
Subject: net: dsa: tag_8021q: include missing refcount.h

The previous assumption was that the caller would already have this
header file included.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 311aa04e7520..804750122c66 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -5,6 +5,7 @@
 #ifndef _NET_DSA_8021Q_H
 #define _NET_DSA_8021Q_H
 
+#include <linux/refcount.h>
 #include <linux/types.h>
 
 struct dsa_switch;
-- 
cgit v1.2.3


From 7e092af2f3b33694b9117ffd978d42b04ec4f260 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 10 Sep 2020 19:48:55 +0300
Subject: net: dsa: tag_8021q: setup tagging via a single function call

There is no point in calling dsa_port_setup_8021q_tagging for each
individual port. Additionally, it will become more difficult to do that
when we'll have a context structure to tag_8021q (next patch). So
refactor this now.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 804750122c66..8586d8cdf956 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -25,8 +25,7 @@ struct dsa_8021q_crosschip_link {
 
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
 
-int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
-				 bool enabled);
+int dsa_8021q_setup(struct dsa_switch *ds, bool enabled);
 
 int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 				    struct dsa_switch *other_ds,
@@ -57,8 +56,7 @@ bool vid_is_dsa_8021q(u16 vid);
 
 #else
 
-int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
-				 bool enabled)
+int dsa_8021q_setup(struct dsa_switch *ds, bool enabled)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 5899ee367ab3fec885aa04d9a2b573bf2e464e7f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 10 Sep 2020 19:48:56 +0300
Subject: net: dsa: tag_8021q: add a context structure

While working on another tag_8021q driver implementation, some things
became apparent:

- It is not mandatory for a DSA driver to offload the tag_8021q VLANs by
  using the VLAN table per se. For example, it can add custom TCAM rules
  that simply encapsulate RX traffic, and redirect & decapsulate rules
  for TX traffic. For such a driver, it makes no sense to receive the
  tag_8021q configuration through the same callback as it receives the
  VLAN configuration from the bridge and the 8021q modules.

- Currently, sja1105 (the only tag_8021q user) sets a
  priv->expect_dsa_8021q variable to distinguish between the bridge
  calling, and tag_8021q calling. That can be improved, to say the
  least.

- The crosschip bridging operations are, in fact, stateful already. The
  list of crosschip_links must be kept by the caller and passed to the
  relevant tag_8021q functions.

So it would be nice if the tag_8021q configuration was more
self-contained. This patch attempts to do that.

Create a struct dsa_8021q_context which encapsulates a struct
dsa_switch, and has 2 function pointers for adding and deleting a VLAN.
These will replace the previous channel to the driver, which was through
the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops.

Also put the list of crosschip_links into this dsa_8021q_context.
Drivers that don't support cross-chip bridging can simply omit to
initialize this list, as long as they dont call any cross-chip function.

The sja1105_vlan_add and sja1105_vlan_del functions are refactored into
a smaller sja1105_vlan_add_one, which now has 2 entry points:
- sja1105_vlan_add, from struct dsa_switch_ops
- sja1105_dsa_8021q_vlan_add, from the tag_8021q ops
But even this change is fairly trivial. It just reflects the fact that
for sja1105, the VLANs from these 2 channels end up in the same hardware
table. However that is not necessarily true in the general sense (and
that's the reason for making this change).

The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The
dsa_8021q_context structure needs to be propagated because adding a VLAN
is now done through the ops function pointers inside of it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 8586d8cdf956..2b003ae9fb38 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -12,30 +12,40 @@ struct dsa_switch;
 struct sk_buff;
 struct net_device;
 struct packet_type;
+struct dsa_8021q_context;
 
 struct dsa_8021q_crosschip_link {
 	struct list_head list;
 	int port;
-	struct dsa_switch *other_ds;
+	struct dsa_8021q_context *other_ctx;
 	int other_port;
 	refcount_t refcount;
 };
 
+struct dsa_8021q_ops {
+	int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags);
+	int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid);
+};
+
+struct dsa_8021q_context {
+	const struct dsa_8021q_ops *ops;
+	struct dsa_switch *ds;
+	struct list_head crosschip_links;
+};
+
 #define DSA_8021Q_N_SUBVLAN			8
 
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
 
-int dsa_8021q_setup(struct dsa_switch *ds, bool enabled);
+int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled);
 
-int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
-				    struct dsa_switch *other_ds,
-				    int other_port,
-				    struct list_head *crosschip_links);
+int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
+				    struct dsa_8021q_context *other_ctx,
+				    int other_port);
 
-int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
-				     struct dsa_switch *other_ds,
-				     int other_port,
-				     struct list_head *crosschip_links);
+int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
+				     struct dsa_8021q_context *other_ctx,
+				     int other_port);
 
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 			       u16 tpid, u16 tci);
@@ -56,23 +66,21 @@ bool vid_is_dsa_8021q(u16 vid);
 
 #else
 
-int dsa_8021q_setup(struct dsa_switch *ds, bool enabled)
+int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
 {
 	return 0;
 }
 
-int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
-				    struct dsa_switch *other_ds,
-				    int other_port,
-				    struct list_head *crosschip_links)
+int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
+				    struct dsa_8021q_context *other_ctx,
+				    int other_port)
 {
 	return 0;
 }
 
-int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
-				     struct dsa_switch *other_ds,
-				     int other_port,
-				     struct list_head *crosschip_links)
+int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
+				     struct dsa_8021q_context *other_ctx,
+				     int other_port)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 1623ad8ec04c771a54975fb84b22bc21c2dbcac1 Mon Sep 17 00:00:00 2001
From: Divya Koppera <Divya.Koppera@microchip.com>
Date: Fri, 11 Sep 2020 18:48:44 +0530
Subject: net: phy: mchp: Add support for LAN8814 QUAD PHY

LAN8814 is a low-power, quad-port triple-speed (10BASE-T/100BASETX/1000BASE-T)
Ethernet physical layer transceiver (PHY). It supports transmission and
reception of data on standard CAT-5, as well as CAT-5e and CAT-6, unshielded
twisted pair (UTP) cables.

LAN8814 supports industry-standard QSGMII (Quad Serial Gigabit Media
Independent Interface) and Q-USGMII (Quad Universal Serial Gigabit Media
Independent Interface) providing chip-to-chip connection to four Gigabit
Ethernet MACs using a single serialized link (differential pair) in each
direction.

The LAN8814 SKU supports high-accuracy timestamping functions to
support IEEE-1588 solutions using Microchip Ethernet switches, as well as
customer solutions based on SoCs and FPGAs.

The LAN8804 SKU has same features as that of LAN8814 SKU except that it does
not support 1588, SyncE, or Q-USGMII with PCH/MCH.

This adds support for 10BASE-T, 100BASE-TX, and 1000BASE-T,
QSGMII link with the MAC.

Signed-off-by: Divya Koppera<divya.koppera@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 75f880c25bb8..416ee6dd2574 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -27,6 +27,7 @@
 #define PHY_ID_KSZ8061		0x00221570
 #define PHY_ID_KSZ9031		0x00221620
 #define PHY_ID_KSZ9131		0x00221640
+#define PHY_ID_LAN8814		0x00221660
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From 346ce5b7d624e8cc2ec5a6abd0ea00f0e06ea8ac Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Fri, 11 Sep 2020 14:07:11 -0700
Subject: Bluetooth: Add mgmt suspend and resume events

Add the controller suspend and resume events, which will signal when
Bluetooth has completed preparing for suspend and when it's ready for
resume.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Reviewed-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  3 +++
 include/net/bluetooth/mgmt.h     | 11 +++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8caac20556b4..02a6ee056b23 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1750,6 +1750,9 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
 void mgmt_discovering(struct hci_dev *hdev, u8 discovering);
+void mgmt_suspending(struct hci_dev *hdev, u8 state);
+void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr,
+		   u8 addr_type);
 bool mgmt_powering_down(struct hci_dev *hdev);
 void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent);
 void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 9ad505b9e694..e19e33c7b65c 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -1030,3 +1030,14 @@ struct mgmt_ev_adv_monitor_added {
 struct mgmt_ev_adv_monitor_removed {
 	__le16 monitor_handle;
 }  __packed;
+
+#define MGMT_EV_CONTROLLER_SUSPEND		0x002d
+struct mgmt_ev_controller_suspend {
+	__u8	suspend_state;
+} __packed;
+
+#define MGMT_EV_CONTROLLER_RESUME		0x002e
+struct mgmt_ev_controller_resume {
+	__u8	wake_reason;
+	struct mgmt_addr_info addr;
+} __packed;
-- 
cgit v1.2.3


From f0cfc486f796dc3b67a4017357a6a8e76a8141c5 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Fri, 11 Sep 2020 14:07:12 -0700
Subject: Bluetooth: Add suspend reason for device disconnect

Update device disconnect event with reason 0x5 to indicate that device
disconnected because the controller is suspending.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Reviewed-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e19e33c7b65c..a4b8935e0db9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -842,6 +842,7 @@ struct mgmt_ev_device_connected {
 #define MGMT_DEV_DISCONN_LOCAL_HOST	0x02
 #define MGMT_DEV_DISCONN_REMOTE		0x03
 #define MGMT_DEV_DISCONN_AUTH_FAILURE	0x04
+#define MGMT_DEV_DISCONN_LOCAL_HOST_SUSPEND	0x05
 
 #define MGMT_EV_DEVICE_DISCONNECTED	0x000C
 struct mgmt_ev_device_disconnected {
-- 
cgit v1.2.3


From 2f20216c1d6fe17c1a224f658be0dc6fab2269c7 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Fri, 11 Sep 2020 14:07:13 -0700
Subject: Bluetooth: Emit controller suspend and resume events

Emit controller suspend and resume events when we are ready for suspend
and we've resumed from suspend.

The controller suspend event will report whatever suspend state was
successfully entered. The controller resume event will check the first
HCI event that was received after we finished preparing for suspend and,
if it was a connection event, store the address of the peer that caused
the event. If it was not a connection event, we mark the wake reason as
an unexpected event.

Here is a sample btmon trace with these events:

@ MGMT Event: Controller Suspended (0x002d) plen 1
        Suspend state: Page scanning and/or passive scanning (2)

@ MGMT Event: Controller Resumed (0x002e) plen 8
        Wake reason: Remote wake due to peer device connection (2)
        LE Address: CD:F3:CD:13:C5:9A (OUI CD-F3-CD)

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 +++
 include/net/bluetooth/mgmt.h     | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 02a6ee056b23..9873e1c8cd16 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -484,6 +484,9 @@ struct hci_dev {
 	enum suspended_state	suspend_state;
 	bool			scanning_paused;
 	bool			suspended;
+	u8			wake_reason;
+	bdaddr_t		wake_addr;
+	u8			wake_addr_type;
 
 	wait_queue_head_t	suspend_wait_q;
 	DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index a4b8935e0db9..6b55155e05e9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -1042,3 +1042,7 @@ struct mgmt_ev_controller_resume {
 	__u8	wake_reason;
 	struct mgmt_addr_info addr;
 } __packed;
+
+#define MGMT_WAKE_REASON_NON_BT_WAKE		0x0
+#define MGMT_WAKE_REASON_UNEXPECTED		0x1
+#define MGMT_WAKE_REASON_REMOTE_WAKE		0x2
-- 
cgit v1.2.3


From 96a9c425e234aac193afb9da6ca11fafc679c7e2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2020 13:02:58 +0100
Subject: rxrpc: Fix a missing NULL-pointer check in a trace

Fix the rxrpc_client tracepoint to not dereference conn to get the cid if
conn is NULL, as it does for other fields.

	RIP: 0010:trace_event_raw_event_rxrpc_client+0x7e/0xe0 [rxrpc]
	Call Trace:
	 rxrpc_activate_channels+0x62/0xb0 [rxrpc]
	 rxrpc_connect_call+0x481/0x650 [rxrpc]
	 ? wake_up_q+0xa0/0xa0
	 ? rxrpc_kernel_begin_call+0x12a/0x1b0 [rxrpc]
	 rxrpc_new_client_call+0x2a5/0x5e0 [rxrpc]

Fixes: 245500d853e9 ("rxrpc: Rewrite the client connection manager")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 include/trace/events/rxrpc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 3b67d5981224..e70c90116eda 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -579,7 +579,7 @@ TRACE_EVENT(rxrpc_client,
 		    __entry->channel = channel;
 		    __entry->usage = conn ? atomic_read(&conn->usage) : -2;
 		    __entry->op = op;
-		    __entry->cid = conn->proto.cid;
+		    __entry->cid = conn ? conn->proto.cid : 0;
 			   ),
 
 	    TP_printk("C=%08x h=%2d %s i=%08x u=%d",
-- 
cgit v1.2.3


From 1fa5cef283420b3dad93cd6ab04d7125bc1562de Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 18 Aug 2020 15:07:57 +0800
Subject: i40e: optimise prefetch page refcount

refcount of rx_buffer page will be added here originally, so prefetchw
is needed, but after commit 1793668c3b8c ("i40e/i40evf: Update code to
better handle incrementing page count"), and refcount is not added
every time, so change prefetchw as prefetch.

Now it mainly services page_address(), but which accesses struct page
only when WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL is defined otherwise
it returns address based on offset, so we prefetch it conditionally.

Jakub suggested to define prefetch_page_address in a common header.

Reported-by: kernel test robot <lkp@intel.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/prefetch.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index 13eafebf3549..b83a3f944f28 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
+struct page;
 /*
 	prefetch(x) attempts to pre-emptively get the memory pointed to
 	by address "x" into the CPU L1 cache. 
@@ -62,4 +63,11 @@ static inline void prefetch_range(void *addr, size_t len)
 #endif
 }
 
+static inline void prefetch_page_address(struct page *page)
+{
+#if defined(WANT_PAGE_VIRTUAL) || defined(HASHED_PAGE_VIRTUAL)
+	prefetch(page);
+#endif
+}
+
 #endif
-- 
cgit v1.2.3


From c76c6956566f974bac2470bd72fc22fb923e04a1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 14 Sep 2020 10:01:18 +0200
Subject: mptcp: call tcp_cleanup_rbuf on subflows

That is needed to let the subflows announce promptly when new
space is available in the receive buffer.

tcp_cleanup_rbuf() is currently a static function, drop the
scope modifier and add a declaration in the TCP header.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e85d564446c6..852f0d71dd40 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1414,6 +1414,8 @@ static inline int tcp_full_space(const struct sock *sk)
 	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
 }
 
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+
 /* We provision sk_rcvbuf around 200% of sk_rcvlowat.
  * If 87.5 % (7/8) of the space has been consumed, we want to override
  * SO_RCVLOWAT constraint, since we are receiving skbs with too small
-- 
cgit v1.2.3


From 0cbe6a8f089e5912a577537c97833546d558c357 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Sep 2020 03:20:27 -0700
Subject: tcp: remove SOCK_QUEUE_SHRUNK

SOCK_QUEUE_SHRUNK is currently used by TCP as a temporary state
that remembers if some room has been made in the rtx queue
by an incoming ACK packet.

This is later used from tcp_check_space() before
considering to send EPOLLOUT.

Problem is: If we receive SACK packets, and no packet
is removed from RTX queue, we can send fresh packets, thus
moving them from write queue to rtx queue and eventually
empty the write queue.

This stall can happen if TCP_NOTSENT_LOWAT is used.

With this fix, we no longer risk stalling sends while holes
are repaired, and we can fully use socket sndbuf.

This also removes a cache line dirtying for typical RPC
workloads.

Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 7dd3051551fb..eaa5cac5e836 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -845,7 +845,6 @@ enum sock_flags {
 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
 	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
-	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
 	SOCK_MEMALLOC, /* VM depends on this socket for swapping */
 	SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
 	SOCK_FASYNC, /* fasync() active */
@@ -1526,7 +1525,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
 DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
-	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 	sk_wmem_queued_add(sk, -skb->truesize);
 	sk_mem_uncharge(sk, skb->truesize);
 	if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
-- 
cgit v1.2.3


From fb609b5112bd74b4ba93c86d7af4089ffd9432c2 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Wed, 13 May 2020 11:06:47 +0300
Subject: net/mlx5: Always use container_of to find mdev pointer from clock
 struct

Clock struct is part of struct mlx5_core_dev. Code was inconsistent, on
some cases used container_of and on another used clock->mdev.

Align code to use container_of amd remove clock->mdev pointer.
While here, fix reverse xmas tree coding style.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
---
 include/linux/mlx5/driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c145de0473bc..8dc3da6e6480 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -643,7 +643,6 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
-	struct mlx5_core_dev      *mdev;
 	struct mlx5_nb             pps_nb;
 	seqlock_t                  lock;
 	struct cyclecounter        cycles;
-- 
cgit v1.2.3


From b7cf0806e8783e38f881cae3c56f0869e70b8da2 Mon Sep 17 00:00:00 2001
From: Ofer Levi <oferle@mellanox.com>
Date: Sun, 17 May 2020 10:16:49 +0300
Subject: net/mlx5e: Add CQE compression support for multi-strides packets

Add CQE compression support for completions of packets that span
multiple strides in a Striding RQ, per the HW capability.
In our memory model, we use small strides (256B as of today) for the
non-linear SKB mode. This feature allows CQE compression to work also
for multiple strides packets. In this case decompressing the mini CQE
array will use stride index provided by HW as part of the mini CQE.
Before this feature, compression was possible only for single-strided
packets, i.e. for packets of size up to 256 bytes when in non-linear
mode, and the index was maintained by SW.
This feature is supported for ConnectX-5 and above.

Feature performance test:
This was whitebox-tested, we reduced the PCI speed from 125Gb/s to
62.5Gb/s to overload pci and manipulated mlx5 driver to drop incoming
packets before building the SKB to achieve low cpu utilization.
Outcome is low cpu utilization and bottleneck on pci only.
Test setup:
Server: Intel(R) Xeon(R) Silver 4108 CPU @ 1.80GHz server, 32 cores
NIC: ConnectX-6 DX.
Sender side generates 300 byte packets at full pci bandwidth.
Receiver side configuration:
Single channel, one cpu processing with one ring allocated. Cpu utilization
is ~20% while pci bandwidth is fully utilized.
For the generated traffic and interface MTU of 4500B (to activate the
non-linear SKB mode), packet rate improvement is about 19% from ~17.6Mpps
to ~21Mpps.
Without this feature, counters show no CQE compression blocks for
this setup, while with the feature, counters show ~20.7Mpps compressed CQEs
in ~500K compression blocks.

Signed-off-by: Ofer Levi <oferle@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4d3376e20f5e..81ca5989009b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -816,7 +816,7 @@ struct mlx5_mini_cqe8 {
 		__be32 rx_hash_result;
 		struct {
 			__be16 checksum;
-			__be16 rsvd;
+			__be16 stridx;
 		};
 		struct {
 			__be16 wqe_counter;
@@ -836,6 +836,7 @@ enum {
 
 enum {
 	MLX5_CQE_FORMAT_CSUM = 0x1,
+	MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3,
 };
 
 #define MLX5_MINI_CQE_ARRAY_SIZE 8
-- 
cgit v1.2.3


From d05e8e68b07cef9d52bbf53e75fa5faea81e1da6 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 10 Sep 2020 19:23:48 +0200
Subject: bridge: Add SWITCHDEV_FDB_FLUSH_TO_BRIDGE notifier

so the switchdev can notifiy the bridge to flush non-permanent fdb entries
for this port. This is useful whenever the hardware fdb of the switchdev
is reset, but the netdev and the bridgeport are not deleted.

Note that this has the same effect as the IFLA_BRPORT_FLUSH attribute.

CC: Jiri Pirko <jiri@resnulli.us>
CC: Ivan Vecera <ivecera@redhat.com>
CC: Roopa Prabhu <roopa@nvidia.com>
CC: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Acked-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ff2246914301..53e8b4994296 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -203,6 +203,7 @@ enum switchdev_notifier_type {
 	SWITCHDEV_FDB_ADD_TO_DEVICE,
 	SWITCHDEV_FDB_DEL_TO_DEVICE,
 	SWITCHDEV_FDB_OFFLOADED,
+	SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
 
 	SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */
 	SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */
-- 
cgit v1.2.3


From 9a27a33027f22a716ce362be48d70ae0eb012ab7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 14 Sep 2020 17:11:52 -0700
Subject: ethtool: add standard pause stats

Currently drivers have to report their pause frames statistics
via ethtool -S, and there is a wide variety of names used for
these statistics.

Add the two statistics defined in IEEE 802.3x to the standard
API. Create a new ethtool request header flag for including
statistics in the response to GET commands.

Always create the ETHTOOL_A_PAUSE_STATS nest in replies when
flag is set. Testing if driver declares the op is not a reliable
way of checking if any stats will actually be included and therefore
we don't want to give the impression that presence of
ETHTOOL_A_PAUSE_STATS indicates driver support.

Note that this patch does not include PFC counters, which may fit
better in dcbnl? But mostly I don't need them/have a setup to test
them so I haven't looked deeply into exposing them :)

v3:
 - add a helper for "uninitializing" stats, rather than a cryptic
   memset() (Andrew)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              | 26 ++++++++++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h | 18 +++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 969a80211df6..060b20f0b20f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -241,6 +241,27 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 	 ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \
 	 ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL)
 
+#define ETHTOOL_STAT_NOT_SET	(~0ULL)
+
+/**
+ * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames
+ * @tx_pause_frames: transmitted pause frame count. Reported to user space
+ *	as %ETHTOOL_A_PAUSE_STAT_TX_FRAMES.
+ *
+ *	Equivalent to `30.3.4.2 aPAUSEMACCtrlFramesTransmitted`
+ *	from the standard.
+ *
+ * @rx_pause_frames: received pause frame count. Reported to user space
+ *	as %ETHTOOL_A_PAUSE_STAT_RX_FRAMES. Equivalent to:
+ *
+ *	Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived`
+ *	from the standard.
+ */
+struct ethtool_pause_stats {
+	u64 tx_pause_frames;
+	u64 rx_pause_frames;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @supported_coalesce_params: supported types of interrupt coalescing.
@@ -282,6 +303,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *	Returns a negative error code or zero.
  * @get_ringparam: Report ring sizes
  * @set_ringparam: Set ring sizes.  Returns a negative error code or zero.
+ * @get_pause_stats: Report pause frame statistics. Drivers must not zero
+ *	statistics which they don't report. The stats structure is initialized
+ *	to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics.
  * @get_pauseparam: Report pause parameters
  * @set_pauseparam: Set pause parameters.  Returns a negative error code
  *	or zero.
@@ -418,6 +442,8 @@ struct ethtool_ops {
 				 struct ethtool_ringparam *);
 	int	(*set_ringparam)(struct net_device *,
 				 struct ethtool_ringparam *);
+	void	(*get_pause_stats)(struct net_device *dev,
+				   struct ethtool_pause_stats *pause_stats);
 	void	(*get_pauseparam)(struct net_device *,
 				  struct ethtool_pauseparam*);
 	int	(*set_pauseparam)(struct net_device *,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 5dcd24cb33ea..9cee6df01a10 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -91,9 +91,12 @@ enum {
 #define ETHTOOL_FLAG_COMPACT_BITSETS	(1 << 0)
 /* provide optional reply for SET or ACT requests */
 #define ETHTOOL_FLAG_OMIT_REPLY	(1 << 1)
+/* request statistics, if supported by the driver */
+#define ETHTOOL_FLAG_STATS		(1 << 2)
 
 #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
-			  ETHTOOL_FLAG_OMIT_REPLY)
+			  ETHTOOL_FLAG_OMIT_REPLY | \
+			  ETHTOOL_FLAG_STATS)
 
 enum {
 	ETHTOOL_A_HEADER_UNSPEC,
@@ -376,12 +379,25 @@ enum {
 	ETHTOOL_A_PAUSE_AUTONEG,			/* u8 */
 	ETHTOOL_A_PAUSE_RX,				/* u8 */
 	ETHTOOL_A_PAUSE_TX,				/* u8 */
+	ETHTOOL_A_PAUSE_STATS,				/* nest - _PAUSE_STAT_* */
 
 	/* add new constants above here */
 	__ETHTOOL_A_PAUSE_CNT,
 	ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_PAUSE_STAT_UNSPEC,
+	ETHTOOL_A_PAUSE_STAT_PAD,
+
+	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
+	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+
+	/* add new constants above here */
+	__ETHTOOL_A_PAUSE_STAT_CNT,
+	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
+};
+
 /* EEE */
 
 enum {
-- 
cgit v1.2.3


From e2ce94dc1d89e0f76ddd202cea72e0f505083d0a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Tue, 15 Sep 2020 11:40:57 +0300
Subject: devlink: introduce the health reporter test command

Introduce a test command for health reporters. User might use this
command to trigger test event on a reporter if the reporter supports it.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 3 +++
 include/uapi/linux/devlink.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index eaec0a8cc5ef..48b1c1ef1ebd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -566,6 +566,7 @@ enum devlink_health_reporter_state {
  * @dump: callback to dump an object
  *        if priv_ctx is NULL, run a full dump
  * @diagnose: callback to diagnose the current status
+ * @test: callback to trigger a test event
  */
 
 struct devlink_health_reporter_ops {
@@ -578,6 +579,8 @@ struct devlink_health_reporter_ops {
 	int (*diagnose)(struct devlink_health_reporter *reporter,
 			struct devlink_fmsg *fmsg,
 			struct netlink_ext_ack *extack);
+	int (*test)(struct devlink_health_reporter *reporter,
+		    struct netlink_ext_ack *extack);
 };
 
 /**
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 40d35145c879..631f5bdf1707 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -122,6 +122,8 @@ enum devlink_command {
 	DEVLINK_CMD_TRAP_POLICER_NEW,
 	DEVLINK_CMD_TRAP_POLICER_DEL,
 
+	DEVLINK_CMD_HEALTH_REPORTER_TEST,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
-- 
cgit v1.2.3


From 7d61588f690def55ba2885f7f4b03d13ff45b163 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 15 Sep 2020 14:40:59 +0300
Subject: nexthop: Remove unused function declaration from header file

Not used or implemented anywhere.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 3a4f9e3b91a5..2e44efe5709b 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -109,9 +109,6 @@ enum nexthop_event_type {
 	NEXTHOP_EVENT_DEL
 };
 
-int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
-			  enum nexthop_event_type event_type,
-			  struct nexthop *nh);
 int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
 
-- 
cgit v1.2.3


From 52f7232a790a36da30eb64c6de6067a9e4ad194c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 15 Sep 2020 14:41:00 +0300
Subject: nexthop: Remove NEXTHOP_EVENT_ADD

Not used anywhere.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 2e44efe5709b..2fd76a9b6dc8 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -105,7 +105,6 @@ struct nexthop {
 };
 
 enum nexthop_event_type {
-	NEXTHOP_EVENT_ADD,
 	NEXTHOP_EVENT_DEL
 };
 
-- 
cgit v1.2.3


From 80690ec6b595807db9a52ec5b225a2d88033ddb5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 15 Sep 2020 14:41:01 +0300
Subject: nexthop: Convert to blocking notification chain

Currently, the only listener of the nexthop notification chain is the
VXLAN driver. Subsequent patches will add more listeners (e.g., device
drivers such as netdevsim) that need to be able to block when processing
notifications.

Therefore, convert the notification chain to a blocking one. This is
safe as notifications are always emitted from process context.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/nexthop.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h
index 1937476c94a0..1849e77eb68a 100644
--- a/include/net/netns/nexthop.h
+++ b/include/net/netns/nexthop.h
@@ -14,6 +14,6 @@ struct netns_nexthop {
 
 	unsigned int		seq;		/* protected by rtnl_mutex */
 	u32			last_id_allocated;
-	struct atomic_notifier_head notifier_chain;
+	struct blocking_notifier_head notifier_chain;
 };
 #endif
-- 
cgit v1.2.3


From 984fe94f94756dacb3c8cc52904a23adf9e04da1 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Tue, 15 Sep 2020 16:45:39 -0700
Subject: bpf: Mutex protect used_maps array and count

To support modifying the used_maps array, we use a mutex to protect
the use of the counter and the array. The mutex is initialized right
after the prog aux is allocated, and destroyed right before prog
aux is freed. This way we guarantee it's initialized for both cBPF
and eBPF.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Cc: YiFei Zhu <zhuyifei1999@gmail.com>
Link: https://lore.kernel.org/bpf/20200915234543.3220146-2-sdf@google.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c6d9f2c444f4..5dcce0364634 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -751,6 +751,7 @@ struct bpf_prog_aux {
 	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
 	struct bpf_map **used_maps;
+	struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
-- 
cgit v1.2.3


From ef15314aa5de955c6afd87d512e8b00f5ac08d06 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Tue, 15 Sep 2020 16:45:40 -0700
Subject: bpf: Add BPF_PROG_BIND_MAP syscall

This syscall binds a map to a program. Returns success if the map is
already bound to the program.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Cc: YiFei Zhu <zhuyifei1999@gmail.com>
Link: https://lore.kernel.org/bpf/20200915234543.3220146-3-sdf@google.com
---
 include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7dd314176df7..a22812561064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
 	BPF_ENABLE_STATS,
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
+	BPF_PROG_BIND_MAP,
 };
 
 enum bpf_map_type {
@@ -658,6 +659,12 @@ union bpf_attr {
 		__u32		flags;
 	} iter_create;
 
+	struct { /* struct used by BPF_PROG_BIND_MAP command */
+		__u32		prog_fd;
+		__u32		map_fd;
+		__u32		flags;		/* extra flags */
+	} prog_bind_map;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
cgit v1.2.3


From ba3a86e47232ad9f76160929f33ac9c64e4d0567 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 14 Sep 2020 15:44:37 -0700
Subject: rcu-tasks: Fix grace-period/unlock race in RCU Tasks Trace

The more intense grace-period processing resulting from the 50x RCU
Tasks Trace grace-period speedups exposed the following race condition:

o	Task A running on CPU 0 executes rcu_read_lock_trace(),
	entering a read-side critical section.

o	When Task A eventually invokes rcu_read_unlock_trace()
	to exit its read-side critical section, this function
	notes that the ->trc_reader_special.s flag is zero and
	and therefore invoke wil set ->trc_reader_nesting to zero
	using WRITE_ONCE().  But before that happens...

o	The RCU Tasks Trace grace-period kthread running on some other
	CPU interrogates Task A, but this fails because this task is
	currently running.  This kthread therefore sends an IPI to CPU 0.

o	CPU 0 receives the IPI, and thus invokes trc_read_check_handler().
	Because Task A has not yet cleared its ->trc_reader_nesting
	counter, this function sees that Task A is still within its
	read-side critical section.  This function therefore sets the
	->trc_reader_nesting.b.need_qs flag, AKA the .need_qs flag.

	Except that Task A has already checked the .need_qs flag, which
	is part of the ->trc_reader_special.s flag.  The .need_qs flag
	therefore remains set until Task A's next rcu_read_unlock_trace().

o	Task A now invokes synchronize_rcu_tasks_trace(), which cannot
	start a new grace period until the current grace period completes.
	And thus cannot return until after that time.

	But Task A's .need_qs flag is still set, which prevents the current
	grace period from completing.  And because Task A is blocked, it
	will never execute rcu_read_unlock_trace() until its call to
	synchronize_rcu_tasks_trace() returns.

	We are therefore deadlocked.

This race is improbable, but 80 hours of rcutorture made it happen twice.
The race was possible before the grace-period speedup, but roughly 50x
less probable.  Several thousand hours of rcutorture would have been
necessary to have a reasonable chance of making this happen before this
50x speedup.

This commit therefore eliminates this deadlock by setting
->trc_reader_nesting to a large negative number before checking the
.need_qs and zeroing (or decrementing with respect to its initial
value) ->trc_reader_nesting.  For its part, the IPI handler's
trc_read_check_handler() function adds a check for negative values,
deferring evaluation of the task in this case.  Taken together, these
changes avoid this deadlock scenario.

Fixes: 276c410448db ("rcu-tasks: Split ->trc_reader_need_end")
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: <bpf@vger.kernel.org>
Cc: <stable@vger.kernel.org> # 5.7.x
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate_trace.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index d9015aac78c6..a6a6a3acab5a 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -50,6 +50,7 @@ static inline void rcu_read_lock_trace(void)
 	struct task_struct *t = current;
 
 	WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
+	barrier();
 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
 	    t->trc_reader_special.b.need_mb)
 		smp_mb(); // Pairs with update-side barriers
@@ -72,6 +73,9 @@ static inline void rcu_read_unlock_trace(void)
 
 	rcu_lock_release(&rcu_trace_lock_map);
 	nesting = READ_ONCE(t->trc_reader_nesting) - 1;
+	barrier(); // Critical section before disabling.
+	// Disable IPI-based setting of .need_qs.
+	WRITE_ONCE(t->trc_reader_nesting, INT_MIN);
 	if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
 		WRITE_ONCE(t->trc_reader_nesting, nesting);
 		return;  // We assume shallow reader nesting.
-- 
cgit v1.2.3


From a748c6975dea325da540610c2ba9b5f332c603e6 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 16 Sep 2020 23:10:05 +0200
Subject: bpf: propagate poke descriptors to subprograms

Previously, there was no need for poke descriptors being present in
subprogram's bpf_prog_aux struct since tailcalls were simply not allowed
in them. Each subprog is JITed independently so in order to enable
JITing subprograms that use tailcalls, do the following:

- in fixup_bpf_calls() store the index of tailcall insn onto the generated
  poke descriptor,
- in case when insn patching occurs, adjust the tailcall insn idx from
  bpf_patch_insn_data,
- then in jit_subprogs() check whether the given poke descriptor belongs
  to the current subprog by checking if that previously stored absolute
  index of tail call insn is in the scope of the insns of given subprog,
- update the insn->imm with new poke descriptor slot so that while JITing
  the proper poke descriptor will be grabbed

This way each of the main program's poke descriptors are distributed
across the subprograms poke descriptor array, so main program's
descriptors can be untracked out of the prog array map.

Add also subprog's aux struct to the BPF map poke_progs list by calling
on it map_poke_track().

In case of any error, call the map_poke_untrack() on subprog's aux
structs that have already been registered to prog array map.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5dcce0364634..a23e5eb728c8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -707,6 +707,7 @@ struct bpf_jit_poke_descriptor {
 	bool ip_stable;
 	u8 adj_off;
 	u16 reason;
+	u32 insn_idx;
 };
 
 /* reg_type info for ctx arguments */
-- 
cgit v1.2.3


From cf71b174d3464c7dc22f86f25d629a8d9d5c3519 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 16 Sep 2020 23:10:06 +0200
Subject: bpf: rename poke descriptor's 'ip' member to 'tailcall_target'

Reflect the actual purpose of poke->ip and rename it to
poke->tailcall_target so that it will not the be confused with another
poke target that will be introduced in next commit.

While at it, do the same thing with poke->ip_stable - rename it to
poke->tailcall_target_stable.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a23e5eb728c8..f3790c9cf542 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -697,14 +697,14 @@ enum bpf_jit_poke_reason {
 
 /* Descriptor of pokes pointing /into/ the JITed image. */
 struct bpf_jit_poke_descriptor {
-	void *ip;
+	void *tailcall_target;
 	union {
 		struct {
 			struct bpf_map *map;
 			u32 key;
 		} tail_call;
 	};
-	bool ip_stable;
+	bool tailcall_target_stable;
 	u8 adj_off;
 	u16 reason;
 	u32 insn_idx;
-- 
cgit v1.2.3


From 2b7ea122a0c437442bf54a5f1c60155757df270c Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 16 Sep 2020 22:16:29 +0800
Subject: net/sched: Remove unused function qdisc_queue_drop_head()

It is not used since commit a09ceb0e0814 ("sched: remove qdisc->drop")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d60e7c39d60c..6c762457122f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1047,12 +1047,6 @@ static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
 	return 0;
 }
 
-static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
-						 struct sk_buff **to_free)
-{
-	return __qdisc_queue_drop_head(sch, &sch->q, to_free);
-}
-
 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
 {
 	const struct qdisc_skb_head *qh = &sch->q;
-- 
cgit v1.2.3


From 5114b331051981ecbdf144b5ad33387ae8d0f0d5 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 16 Sep 2020 22:17:28 +0800
Subject: genetlink: Remove unused function genl_err_attr()

It is never used, so can remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 6e5f1e1aa822..b9eb92f3fe86 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -101,14 +101,6 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 
 #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
 
-static inline int genl_err_attr(struct genl_info *info, int err,
-				const struct nlattr *attr)
-{
-	info->extack->bad_attr = attr;
-
-	return err;
-}
-
 enum genl_validate_flags {
 	GENL_DONT_VALIDATE_STRICT		= BIT(0),
 	GENL_DONT_VALIDATE_DUMP			= BIT(1),
-- 
cgit v1.2.3


From 2492c205d2bbbc01f5c9e49fffe4b2e633c33f38 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 17 Sep 2020 10:19:10 +0800
Subject: netdev: Remove unused functions

There is no callers in tree, so can remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 157e0242e9ee..909b1fbb0481 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4677,16 +4677,6 @@ int netdev_class_create_file_ns(const struct class_attribute *class_attr,
 void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
 				 const void *ns);
 
-static inline int netdev_class_create_file(const struct class_attribute *class_attr)
-{
-	return netdev_class_create_file_ns(class_attr, NULL);
-}
-
-static inline void netdev_class_remove_file(const struct class_attribute *class_attr)
-{
-	netdev_class_remove_file_ns(class_attr, NULL);
-}
-
 extern const struct kobj_ns_type_operations net_ns_type_operations;
 
 const char *netdev_drivername(const struct net_device *dev);
-- 
cgit v1.2.3


From 78a3ea5557137b0811f3c5a020afaafa7b61d6aa Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 17 Sep 2020 10:51:32 -0700
Subject: net: remove comments on struct rtnl_link_stats

We removed the misleading comments from struct rtnl_link_stats64
when we added proper kdoc. struct rtnl_link_stats has the same
inline comments, so remove them, too.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bf4667403cab..c4b23f06f69e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -7,24 +7,23 @@
 
 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
+	__u32	rx_packets;
+	__u32	tx_packets;
+	__u32	rx_bytes;
+	__u32	tx_bytes;
+	__u32	rx_errors;
+	__u32	tx_errors;
+	__u32	rx_dropped;
+	__u32	tx_dropped;
+	__u32	multicast;
 	__u32	collisions;
-
 	/* detailed rx_errors: */
 	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
+	__u32	rx_over_errors;
+	__u32	rx_crc_errors;
+	__u32	rx_frame_errors;
+	__u32	rx_fifo_errors;
+	__u32	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u32	tx_aborted_errors;
@@ -37,7 +36,7 @@ struct rtnl_link_stats {
 	__u32	rx_compressed;
 	__u32	tx_compressed;
 
-	__u32	rx_nohandler;		/* dropped, no handler found	*/
+	__u32	rx_nohandler;
 };
 
 /**
-- 
cgit v1.2.3


From 7f6e4312e15a5c370e84eaa685879b6bdcc717e4 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 16 Sep 2020 23:10:07 +0200
Subject: bpf: Limit caller's stack depth 256 for subprogs with tailcalls

Protect against potential stack overflow that might happen when bpf2bpf
calls get combined with tailcalls. Limit the caller's stack depth for
such case down to 256 so that the worst case scenario would result in 8k
stack size (32 which is tailcall limit * 256 = 8k).

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 53c7bd568c5d..5026b75db972 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -358,6 +358,7 @@ struct bpf_subprog_info {
 	u32 start; /* insn idx of function entry point */
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
+	bool has_tail_call;
 };
 
 /* single container for all structs
-- 
cgit v1.2.3


From ebf7d1f508a73871acf3b2bfbfa1323a477acdb3 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 16 Sep 2020 23:10:08 +0200
Subject: bpf, x64: rework pro/epilogue and tailcall handling in JIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit serves two things:
1) it optimizes BPF prologue/epilogue generation
2) it makes possible to have tailcalls within BPF subprogram

Both points are related to each other since without 1), 2) could not be
achieved.

In [1], Alexei says:
"The prologue will look like:
nop5
xor eax,eax  // two new bytes if bpf_tail_call() is used in this
             // function
push rbp
mov rbp, rsp
sub rsp, rounded_stack_depth
push rax // zero init tail_call counter
variable number of push rbx,r13,r14,r15

Then bpf_tail_call will pop variable number rbx,..
and final 'pop rax'
Then 'add rsp, size_of_current_stack_frame'
jmp to next function and skip over 'nop5; xor eax,eax; push rpb; mov
rbp, rsp'

This way new function will set its own stack size and will init tail
call
counter with whatever value the parent had.

If next function doesn't use bpf_tail_call it won't have 'xor eax,eax'.
Instead it would need to have 'nop2' in there."

Implement that suggestion.

Since the layout of stack is changed, tail call counter handling can not
rely anymore on popping it to rbx just like it have been handled for
constant prologue case and later overwrite of rbx with actual value of
rbx pushed to stack. Therefore, let's use one of the register (%rcx) that
is considered to be volatile/caller-saved and pop the value of tail call
counter in there in the epilogue.

Drop the BUILD_BUG_ON in emit_prologue and in
emit_bpf_tail_call_indirect where instruction layout is not constant
anymore.

Introduce new poke target, 'tailcall_bypass' to poke descriptor that is
dedicated for skipping the register pops and stack unwind that are
generated right before the actual jump to target program.
For case when the target program is not present, BPF program will skip
the pop instructions and nop5 dedicated for jmpq $target. An example of
such state when only R6 of callee saved registers is used by program:

ffffffffc0513aa1:       e9 0e 00 00 00          jmpq   0xffffffffc0513ab4
ffffffffc0513aa6:       5b                      pop    %rbx
ffffffffc0513aa7:       58                      pop    %rax
ffffffffc0513aa8:       48 81 c4 00 00 00 00    add    $0x0,%rsp
ffffffffc0513aaf:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
ffffffffc0513ab4:       48 89 df                mov    %rbx,%rdi

When target program is inserted, the jump that was there to skip
pops/nop5 will become the nop5, so CPU will go over pops and do the
actual tailcall.

One might ask why there simply can not be pushes after the nop5?
In the following example snippet:

ffffffffc037030c:       48 89 fb                mov    %rdi,%rbx
(...)
ffffffffc0370332:       5b                      pop    %rbx
ffffffffc0370333:       58                      pop    %rax
ffffffffc0370334:       48 81 c4 00 00 00 00    add    $0x0,%rsp
ffffffffc037033b:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
ffffffffc0370340:       48 81 ec 00 00 00 00    sub    $0x0,%rsp
ffffffffc0370347:       50                      push   %rax
ffffffffc0370348:       53                      push   %rbx
ffffffffc0370349:       48 89 df                mov    %rbx,%rdi
ffffffffc037034c:       e8 f7 21 00 00          callq  0xffffffffc0372548

There is the bpf2bpf call (at ffffffffc037034c) right after the tailcall
and jump target is not present. ctx is in %rbx register and BPF
subprogram that we will call into on ffffffffc037034c is relying on it,
e.g. it will pick ctx from there. Such code layout is therefore broken
as we would overwrite the content of %rbx with the value that was pushed
on the prologue. That is the reason for the 'bypass' approach.

Special care needs to be taken during the install/update/remove of
tailcall target. In case when target program is not present, the CPU
must not execute the pop instructions that precede the tailcall.

To address that, the following states can be defined:
A nop, unwind, nop
B nop, unwind, tail
C skip, unwind, nop
D skip, unwind, tail

A is forbidden (lead to incorrectness). The state transitions between
tailcall install/update/remove will work as follows:

First install tail call f: C->D->B(f)
 * poke the tailcall, after that get rid of the skip
Update tail call f to f': B(f)->B(f')
 * poke the tailcall (poke->tailcall_target) and do NOT touch the
   poke->tailcall_bypass
Remove tail call: B(f')->C(f')
 * poke->tailcall_bypass is poked back to jump, then we wait the RCU
   grace period so that other programs will finish its execution and
   after that we are safe to remove the poke->tailcall_target
Install new tail call (f''): C(f')->D(f'')->B(f'').
 * same as first step

This way CPU can never be exposed to "unwind, tail" state.

Last but not least, when tailcalls get mixed with bpf2bpf calls, it
would be possible to encounter the endless loop due to clearing the
tailcall counter if for example we would use the tailcall3-like from BPF
selftests program that would be subprogram-based, meaning the tailcall
would be present within the BPF subprogram.

This test, broken down to particular steps, would do:
entry -> set tailcall counter to 0, bump it by 1, tailcall to func0
func0 -> call subprog_tail
(we are NOT skipping the first 11 bytes of prologue and this subprogram
has a tailcall, therefore we clear the counter...)
subprog -> do the same thing as entry

and then loop forever.

To address this, the idea is to go through the call chain of bpf2bpf progs
and look for a tailcall presence throughout whole chain. If we saw a single
tail call then each node in this call chain needs to be marked as a subprog
that can reach the tailcall. We would later feed the JIT with this info
and:
- set eax to 0 only when tailcall is reachable and this is the entry prog
- if tailcall is reachable but there's no tailcall in insns of currently
  JITed prog then push rax anyway, so that it will be possible to
  propagate further down the call chain
- finally if tailcall is reachable, then we need to precede the 'call'
  insn with mov rax, [rbp - (stack_depth + 8)]

Tail call related cases from test_verifier kselftest are also working
fine. Sample BPF programs that utilize tail calls (sockex3, tracex5)
work properly as well.

[1]: https://lore.kernel.org/bpf/20200517043227.2gpq22ifoq37ogst@ast-mbp.dhcp.thefacebook.com/

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 3 +++
 include/linux/bpf_verifier.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f3790c9cf542..d7c5a6ed87e3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -698,6 +698,8 @@ enum bpf_jit_poke_reason {
 /* Descriptor of pokes pointing /into/ the JITed image. */
 struct bpf_jit_poke_descriptor {
 	void *tailcall_target;
+	void *tailcall_bypass;
+	void *bypass_addr;
 	union {
 		struct {
 			struct bpf_map *map;
@@ -738,6 +740,7 @@ struct bpf_prog_aux {
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
 	bool sleepable;
+	bool tail_call_reachable;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5026b75db972..fbc964526ba3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -359,6 +359,7 @@ struct bpf_subprog_info {
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
 	bool has_tail_call;
+	bool tail_call_reachable;
 };
 
 /* single container for all structs
-- 
cgit v1.2.3


From 09b28d76eac48e922dc293da1aa2b2b85c32aeee Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 17 Sep 2020 19:09:18 -0700
Subject: bpf: Add abnormal return checks.

LD_[ABS|IND] instructions may return from the function early. bpf_tail_call
pseudo instruction is either fallthrough or return. Allow them in the
subprograms only when subprograms are BTF annotated and have scalar return
types. Allow ld_abs and tail_call in the main program even if it calls into
subprograms. In the past that was not ok to do for ld_abs, since it was JITed
with special exit sequence. Since bpf_gen_ld_abs() was introduced the ld_abs
looks like normal exit insn from JIT point of view, so it's safe to allow them
in the main program.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index fbc964526ba3..2bb48a2c4d08 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -360,6 +360,7 @@ struct bpf_subprog_info {
 	u16 stack_depth; /* max. stack depth used by this function */
 	bool has_tail_call;
 	bool tail_call_reachable;
+	bool has_ld_abs;
 };
 
 /* single container for all structs
-- 
cgit v1.2.3


From 9d6e371dda7f3294e1b7d2a00d8e77a042b42988 Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Tue, 8 Sep 2020 01:01:57 -0500
Subject: cfg80211: add more comments for ap_isolate in bss_parameters

The value of struct bss_parameters::ap_isolate will be -1, 0 or 1.
The value -1 means not to change. To prevent developers from thinking
ap_isolate is only 0 or 1, I add more comments on it.

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Reviewed-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200908060157.98846-1-wright.feng@cypress.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c9bce9bba511..7ad530912b21 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1787,6 +1787,7 @@ struct mpath_info {
  *	(or NULL for no change)
  * @basic_rates_len: number of basic rates
  * @ap_isolate: do not forward packets between connected stations
+ *	(0 = no, 1 = yes, -1 = do not change)
  * @ht_opmode: HT Operation mode
  *	(u16 = opmode, -1 = do not change)
  * @p2p_ctwindow: P2P CT Window (-1 = no change)
-- 
cgit v1.2.3


From 6aea26ce5a4cf854c1a86f3760753b5e2617578f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 8 Sep 2020 14:36:53 +0200
Subject: mac80211: rework tx encapsulation offload API

The current API (which lets the driver turn on/off per vif directly) has a
number of limitations:
- it does not deal with AP_VLAN
- conditions for enabling (no tkip, no monitor) are only checked at
  add_interface time
- no way to indicate 4-addr support

In order to address this, store offload flags in struct ieee80211_vif
(easy to extend for decap offload later). mac80211 initially sets the enable
flag, but gives the driver a chance to modify it before its settings are
applied. In addition to the .add_interface op, a .update_vif_offload op is
introduced, which can be used for runtime changes.

If a driver can't disable encap offload at runtime, or if it has some extra
limitations, it can simply override the flags within those ops.

Support for encap offload with 4-address mode interfaces can be enabled
by setting a flag from .add_interface or .update_vif_offload.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200908123702.88454-6-nbd@nbd.name
[resolved conflict with commit aa2092a9bab3 ("ath11k: add raw mode and
software crypto support")]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ec148b3e9c41..01612a82aacf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1606,6 +1606,21 @@ enum ieee80211_vif_flags {
 	IEEE80211_VIF_GET_NOA_UPDATE		= BIT(3),
 };
 
+
+/**
+ * enum ieee80211_offload_flags - virtual interface offload flags
+ *
+ * @IEEE80211_OFFLOAD_ENCAP_ENABLED: tx encapsulation offload is enabled
+ *	The driver supports sending frames passed as 802.3 frames by mac80211.
+ *	It must also support sending 802.11 packets for the same interface.
+ * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload
+ */
+
+enum ieee80211_offload_flags {
+	IEEE80211_OFFLOAD_ENCAP_ENABLED		= BIT(0),
+	IEEE80211_OFFLOAD_ENCAP_4ADDR		= BIT(1),
+};
+
 /**
  * struct ieee80211_vif - per-interface data
  *
@@ -1626,6 +1641,11 @@ enum ieee80211_vif_flags {
  *	these need to be set (or cleared) when the interface is added
  *	or, if supported by the driver, the interface type is changed
  *	at runtime, mac80211 will never touch this field
+ * @offloaad_flags: hardware offload capabilities/flags for this interface.
+ *	These are initialized by mac80211 before calling .add_interface,
+ *	.change_interface or .update_vif_offload and updated by the driver
+ *	within these ops, based on supported features or runtime change
+ *	restrictions.
  * @hw_queue: hardware queue for each AC
  * @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only
  * @chanctx_conf: The channel context this interface is assigned to, or %NULL
@@ -1662,6 +1682,7 @@ struct ieee80211_vif {
 	struct ieee80211_chanctx_conf __rcu *chanctx_conf;
 
 	u32 driver_flags;
+	u32 offload_flags;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct dentry *debugfs_dir;
@@ -2328,6 +2349,9 @@ struct ieee80211_txq {
  *	aggregating MPDUs with the same keyid, allowing mac80211 to keep Tx
  *	A-MPDU sessions active while rekeying with Extended Key ID.
  *
+ * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation
+ *	offload
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2380,6 +2404,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_MULTI_BSSID,
 	IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
 	IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
+	IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -3814,6 +3839,8 @@ enum ieee80211_reconfig_type {
  * @set_tid_config: Apply TID specific configurations. This callback may sleep.
  * @reset_tid_config: Reset TID specific configuration for the peer.
  *	This callback may sleep.
+ * @update_vif_config: Update virtual interface offload flags
+ *	This callback may sleep.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4125,6 +4152,8 @@ struct ieee80211_ops {
 	int (*reset_tid_config)(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif,
 				struct ieee80211_sta *sta, u8 tids);
+	void (*update_vif_offload)(struct ieee80211_hw *hw,
+				   struct ieee80211_vif *vif);
 };
 
 /**
-- 
cgit v1.2.3


From c74114d7d51521bd785bf6aa0f90ee87d99bee8a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 8 Sep 2020 14:36:55 +0200
Subject: mac80211: remove tx status call to ieee80211_sta_register_airtime

All drivers using airtime fairness are calling ieee80211_sta_register_airtime
directly, now they must. Document this as well.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200908123702.88454-8-nbd@nbd.name
[johannes: update the documentation to suit]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 01612a82aacf..ca270f7d82b9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1002,7 +1002,8 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
  * @status.ampdu_ack_len: AMPDU ack length
  * @status.ampdu_len: AMPDU length
  * @status.antenna: (legacy, kept only for iwlegacy)
- * @status.tx_time: airtime consumed for transmission
+ * @status.tx_time: airtime consumed for transmission; note this is only
+ *	used for WMM AC, not for airtime fairness
  * @status.is_valid_ack_signal: ACK signal is valid
  * @status.status_driver_data: driver use area
  * @ack: union part for pure ACK data
@@ -5676,7 +5677,7 @@ void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid);
 /**
  * ieee80211_sta_register_airtime - register airtime usage for a sta/tid
  *
- * Register airtime usage for a given sta on a given tid. The driver can call
+ * Register airtime usage for a given sta on a given tid. The driver must call
  * this function to notify mac80211 that a station used a certain amount of
  * airtime. This information will be used by the TXQ scheduler to schedule
  * stations in a way that ensures airtime fairness.
-- 
cgit v1.2.3


From cc20ff2c6b5d3e28747c6d30ecd097ea1a4d2502 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 8 Sep 2020 14:36:57 +0200
Subject: mac80211: swap NEED_TXPROCESSING and HW_80211_ENCAP tx flags

In order to unify the tx status path, the hw 802.11 encapsulation flag
needs to survive the trip to the tx status call.
Since we don't have any free bits in info->flags, we need to move one.
IEEE80211_TX_INTFL_NEED_TXPROCESSING is only used internally in mac80211,
and only before the call into the driver.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200908123702.88454-10-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ca270f7d82b9..3a9ab3c10050 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -720,9 +720,8 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_INTFL_OFFCHAN_TX_OK: Internal to mac80211. Used to indicate
  *	that a frame can be transmitted while the queues are stopped for
  *	off-channel operation.
- * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211,
- *	used to indicate that a pending frame requires TX processing before
- *	it can be sent out.
+ * @IEEE80211_TX_CTL_HW_80211_ENCAP: This frame uses hardware encapsulation
+ *	(header conversion)
  * @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211,
  *	used to indicate that a frame was already retried due to PS
  * @IEEE80211_TX_INTFL_DONT_ENCRYPT: completely internal to mac80211,
@@ -791,7 +790,7 @@ enum mac80211_tx_info_flags {
 	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(11),
 	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(12),
 	IEEE80211_TX_INTFL_OFFCHAN_TX_OK	= BIT(13),
-	IEEE80211_TX_INTFL_NEED_TXPROCESSING	= BIT(14),
+	IEEE80211_TX_CTL_HW_80211_ENCAP		= BIT(14),
 	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
 	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
 	IEEE80211_TX_CTL_NO_PS_BUFFER		= BIT(17),
@@ -823,8 +822,9 @@ enum mac80211_tx_info_flags {
  * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
  * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
  * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup
- * @IEEE80211_TX_CTRL_HW_80211_ENCAP: This frame uses hardware encapsulation
- *	(header conversion)
+ * @IEEE80211_TX_INTCFL_NEED_TXPROCESSING: completely internal to mac80211,
+ *	used to indicate that a pending frame requires TX processing before
+ *	it can be sent out.
  * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that
  *	has already been assigned to this frame.
  *
@@ -837,7 +837,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTRL_AMSDU			= BIT(3),
 	IEEE80211_TX_CTRL_FAST_XMIT		= BIT(4),
 	IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP	= BIT(5),
-	IEEE80211_TX_CTRL_HW_80211_ENCAP	= BIT(6),
+	IEEE80211_TX_INTCFL_NEED_TXPROCESSING	= BIT(6),
 	IEEE80211_TX_CTRL_NO_SEQNO		= BIT(7),
 };
 
-- 
cgit v1.2.3


From 1ff4e8f2dec8b145b451f05320e4f9e01d254ae2 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 8 Sep 2020 14:37:01 +0200
Subject: mac80211: notify the driver when a sta uses 4-address mode

This is needed for encapsulation offload of 4-address mode packets

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200908123702.88454-14-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a9ab3c10050..07c4dd7ab55f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3842,6 +3842,8 @@ enum ieee80211_reconfig_type {
  *	This callback may sleep.
  * @update_vif_config: Update virtual interface offload flags
  *	This callback may sleep.
+ * @sta_set_4addr: Called to notify the driver when a station starts/stops using
+ *	4-address mode
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4155,6 +4157,8 @@ struct ieee80211_ops {
 				struct ieee80211_sta *sta, u8 tids);
 	void (*update_vif_offload)(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif);
+	void (*sta_set_4addr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			      struct ieee80211_sta *sta, bool enabled);
 };
 
 /**
-- 
cgit v1.2.3


From f02dff93e26bef46f5511f1e8229061bd23c3074 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 8 Sep 2020 14:37:00 +0200
Subject: mac80211: extend ieee80211_tx_status_ext to support bulk free

Store processed skbs ready to be freed in a list so the driver bulk free them

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200908123702.88454-13-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 07c4dd7ab55f..d3c43420779c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1096,12 +1096,14 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info)
  * @info: Basic tx status information
  * @skb: Packet skb (can be NULL if not provided by the driver)
  * @rate: The TX rate that was used when sending the packet
+ * @free_list: list where processed skbs are stored to be free'd by the driver
  */
 struct ieee80211_tx_status {
 	struct ieee80211_sta *sta;
 	struct ieee80211_tx_info *info;
 	struct sk_buff *skb;
 	struct rate_info *rate;
+	struct list_head *free_list;
 };
 
 /**
-- 
cgit v1.2.3


From 37050e3ab0b3f02819e3d70ab01d97addb810b28 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Tue, 8 Sep 2020 12:03:02 -0700
Subject: ieee80211: redefine S1G bits with GENMASK

The S1G capability fields were defined by ORing BITS()
together, and expecting a custom macro to use the _SHIFT
definitions. Use the Linux kernel GENMASK for the
definitions now, and FIELD_{GET,PREP} to access the fields
in the future.

Take the chance to rename eg. S1G_CAPAB_B0 to the more
compact S1G_CAP0.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200908190323.15814-2-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 156 +++++++++++++++++++++++-----------------------
 1 file changed, 78 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c47f43e65a2f..53fba39d4ba6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2330,84 +2330,84 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 }
 
 /* S1G Capabilities Information field */
-#define S1G_CAPAB_B0_S1G_LONG BIT(0)
-#define S1G_CAPAB_B0_SGI_1MHZ BIT(1)
-#define S1G_CAPAB_B0_SGI_2MHZ BIT(2)
-#define S1G_CAPAB_B0_SGI_4MHZ BIT(3)
-#define S1G_CAPAB_B0_SGI_8MHZ BIT(4)
-#define S1G_CAPAB_B0_SGI_16MHZ BIT(5)
-#define S1G_CAPAB_B0_SUPP_CH_WIDTH_MASK (BIT(6) | BIT(7))
-#define S1G_CAPAB_B0_SUPP_CH_WIDTH_SHIFT 6
-
-#define S1G_CAPAB_B1_RX_LDPC BIT(0)
-#define S1G_CAPAB_B1_TX_STBC BIT(1)
-#define S1G_CAPAB_B1_RX_STBC BIT(2)
-#define S1G_CAPAB_B1_SU_BFER BIT(3)
-#define S1G_CAPAB_B1_SU_BFEE BIT(4)
-#define S1G_CAPAB_B1_BFEE_STS_MASK (BIT(5) | BIT(6) | BIT(7))
-#define S1G_CAPAB_B1_BFEE_STS_SHIFT 5
-
-#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_MASK (BIT(0) | BIT(1) | BIT(2))
-#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_SHIFT 0
-#define S1G_CAPAB_B2_MU_BFER BIT(3)
-#define S1G_CAPAB_B2_MU_BFEE BIT(4)
-#define S1G_CAPAB_B2_PLUS_HTC_VHT BIT(5)
-#define S1G_CAPAB_B2_TRAVELING_PILOT_MASK (BIT(6) | BIT(7))
-#define S1G_CAPAB_B2_TRAVELING_PILOT_SHIFT 6
-
-#define S1G_CAPAB_B3_RD_RESPONDER BIT(0)
-#define S1G_CAPAB_B3_HT_DELAYED_BA BIT(1)
-#define S1G_CAPAB_B3_MAX_MPDU_LEN BIT(2)
-#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_MASK (BIT(3) | BIT(4))
-#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_SHIFT 3
-#define S1G_CAPAB_B3_MIN_MPDU_START_MASK (BIT(5) | BIT(6) | BIT(7))
-#define S1G_CAPAB_B3_MIN_MPDU_START_SHIFT 5
-
-#define S1G_CAPAB_B4_UPLINK_SYNC BIT(0)
-#define S1G_CAPAB_B4_DYNAMIC_AID BIT(1)
-#define S1G_CAPAB_B4_BAT BIT(2)
-#define S1G_CAPAB_B4_TIME_ADE BIT(3)
-#define S1G_CAPAB_B4_NON_TIM BIT(4)
-#define S1G_CAPAB_B4_GROUP_AID BIT(5)
-#define S1G_CAPAB_B4_STA_TYPE_MASK (BIT(6) | BIT(7))
-#define S1G_CAPAB_B4_STA_TYPE_SHIFT 6
-
-#define S1G_CAPAB_B5_CENT_AUTH_CONTROL BIT(0)
-#define S1G_CAPAB_B5_DIST_AUTH_CONTROL BIT(1)
-#define S1G_CAPAB_B5_AMSDU BIT(2)
-#define S1G_CAPAB_B5_AMPDU BIT(3)
-#define S1G_CAPAB_B5_ASYMMETRIC_BA BIT(4)
-#define S1G_CAPAB_B5_FLOW_CONTROL BIT(5)
-#define S1G_CAPAB_B5_SECTORIZED_BEAM_MASK (BIT(6) | BIT(7))
-#define S1G_CAPAB_B5_SECTORIZED_BEAM_SHIFT 6
-
-#define S1G_CAPAB_B6_OBSS_MITIGATION BIT(0)
-#define S1G_CAPAB_B6_FRAGMENT_BA BIT(1)
-#define S1G_CAPAB_B6_NDP_PS_POLL BIT(2)
-#define S1G_CAPAB_B6_RAW_OPERATION BIT(3)
-#define S1G_CAPAB_B6_PAGE_SLICING BIT(4)
-#define S1G_CAPAB_B6_TXOP_SHARING_IMP_ACK BIT(5)
-#define S1G_CAPAB_B6_VHT_LINK_ADAPT_MASK (BIT(6) | BIT(7))
-#define S1G_CAPAB_B6_VHT_LINK_ADAPT_SHIFT 6
-
-#define S1G_CAPAB_B7_TACK_AS_PS_POLL BIT(0)
-#define S1G_CAPAB_B7_DUP_1MHZ BIT(1)
-#define S1G_CAPAB_B7_MCS_NEGOTIATION BIT(2)
-#define S1G_CAPAB_B7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3)
-#define S1G_CAPAB_B7_NDP_BFING_REPORT_POLL BIT(4)
-#define S1G_CAPAB_B7_UNSOLICITED_DYN_AID BIT(5)
-#define S1G_CAPAB_B7_SECTOR_TRAINING_OPERATION BIT(6)
-#define S1G_CAPAB_B7_TEMP_PS_MODE_SWITCH BIT(7)
-
-#define S1G_CAPAB_B8_TWT_GROUPING BIT(0)
-#define S1G_CAPAB_B8_BDT BIT(1)
-#define S1G_CAPAB_B8_COLOR_MASK (BIT(2) | BIT(3) | BIT(4))
-#define S1G_CAPAB_B8_COLOR_SHIFT 2
-#define S1G_CAPAB_B8_TWT_REQUEST BIT(5)
-#define S1G_CAPAB_B8_TWT_RESPOND BIT(6)
-#define S1G_CAPAB_B8_PV1_FRAME BIT(7)
-
-#define S1G_CAPAB_B9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0)
+#define S1G_CAP0_S1G_LONG	BIT(0)
+#define S1G_CAP0_SGI_1MHZ	BIT(1)
+#define S1G_CAP0_SGI_2MHZ	BIT(2)
+#define S1G_CAP0_SGI_4MHZ	BIT(3)
+#define S1G_CAP0_SGI_8MHZ	BIT(4)
+#define S1G_CAP0_SGI_16MHZ	BIT(5)
+#define S1G_CAP0_SUPP_CH_WIDTH	GENMASK(7, 6)
+
+#define S1G_SUPP_CH_WIDTH_2	0
+#define S1G_SUPP_CH_WIDTH_4	1
+#define S1G_SUPP_CH_WIDTH_8	2
+#define S1G_SUPP_CH_WIDTH_16	3
+#define S1G_SUPP_CH_WIDTH_MAX(cap) ((1 << FIELD_GET(S1G_CAP0_SUPP_CH_WIDTH, \
+						    cap[0])) << 1)
+
+#define S1G_CAP1_RX_LDPC	BIT(0)
+#define S1G_CAP1_TX_STBC	BIT(1)
+#define S1G_CAP1_RX_STBC	BIT(2)
+#define S1G_CAP1_SU_BFER	BIT(3)
+#define S1G_CAP1_SU_BFEE	BIT(4)
+#define S1G_CAP1_BFEE_STS	GENMASK(7, 5)
+
+#define S1G_CAP2_SOUNDING_DIMENSIONS	GENMASK(2, 0)
+#define S1G_CAP2_MU_BFER		BIT(3)
+#define S1G_CAP2_MU_BFEE		BIT(4)
+#define S1G_CAP2_PLUS_HTC_VHT		BIT(5)
+#define S1G_CAP2_TRAVELING_PILOT	GENMASK(7, 6)
+
+#define S1G_CAP3_RD_RESPONDER		BIT(0)
+#define S1G_CAP3_HT_DELAYED_BA		BIT(1)
+#define S1G_CAP3_MAX_MPDU_LEN		BIT(2)
+#define S1G_CAP3_MAX_AMPDU_LEN_EXP	GENMASK(4, 3)
+#define S1G_CAP3_MIN_MPDU_START		GENMASK(7, 5)
+
+#define S1G_CAP4_UPLINK_SYNC	BIT(0)
+#define S1G_CAP4_DYNAMIC_AID	BIT(1)
+#define S1G_CAP4_BAT		BIT(2)
+#define S1G_CAP4_TIME_ADE	BIT(3)
+#define S1G_CAP4_NON_TIM	BIT(4)
+#define S1G_CAP4_GROUP_AID	BIT(5)
+#define S1G_CAP4_STA_TYPE	GENMASK(7, 6)
+
+#define S1G_CAP5_CENT_AUTH_CONTROL	BIT(0)
+#define S1G_CAP5_DIST_AUTH_CONTROL	BIT(1)
+#define S1G_CAP5_AMSDU			BIT(2)
+#define S1G_CAP5_AMPDU			BIT(3)
+#define S1G_CAP5_ASYMMETRIC_BA		BIT(4)
+#define S1G_CAP5_FLOW_CONTROL		BIT(5)
+#define S1G_CAP5_SECTORIZED_BEAM	GENMASK(7, 6)
+
+#define S1G_CAP6_OBSS_MITIGATION	BIT(0)
+#define S1G_CAP6_FRAGMENT_BA		BIT(1)
+#define S1G_CAP6_NDP_PS_POLL		BIT(2)
+#define S1G_CAP6_RAW_OPERATION		BIT(3)
+#define S1G_CAP6_PAGE_SLICING		BIT(4)
+#define S1G_CAP6_TXOP_SHARING_IMP_ACK	BIT(5)
+#define S1G_CAP6_VHT_LINK_ADAPT		GENMASK(7, 6)
+
+#define S1G_CAP7_TACK_AS_PS_POLL		BIT(0)
+#define S1G_CAP7_DUP_1MHZ			BIT(1)
+#define S1G_CAP7_MCS_NEGOTIATION		BIT(2)
+#define S1G_CAP7_1MHZ_CTL_RESPONSE_PREAMBLE	BIT(3)
+#define S1G_CAP7_NDP_BFING_REPORT_POLL		BIT(4)
+#define S1G_CAP7_UNSOLICITED_DYN_AID		BIT(5)
+#define S1G_CAP7_SECTOR_TRAINING_OPERATION	BIT(6)
+#define S1G_CAP7_TEMP_PS_MODE_SWITCH		BIT(7)
+
+#define S1G_CAP8_TWT_GROUPING	BIT(0)
+#define S1G_CAP8_BDT		BIT(1)
+#define S1G_CAP8_COLOR		GENMASK(4, 2)
+#define S1G_CAP8_TWT_REQUEST	BIT(5)
+#define S1G_CAP8_TWT_RESPOND	BIT(6)
+#define S1G_CAP8_PV1_FRAME	BIT(7)
+
+#define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0)
+
+#define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ	BIT(0)
+#define S1G_OPER_CH_WIDTH_OPER		GENMASK(4, 1)
 
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
-- 
cgit v1.2.3


From d65a977087f94f3bb97f351798d864556063109a Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Tue, 8 Sep 2020 12:03:03 -0700
Subject: nl80211: advertise supported channel width in S1G

S1G supports 5 channel widths: 1, 2, 4, 8, and 16. One
channel width is allowed per frequency in each operating
class, so it makes more sense to advertise the specific
channel width allowed.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200908190323.15814-3-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 15 +++++++++++++++
 include/uapi/linux/nl80211.h | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7ad530912b21..2a7561743717 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -96,6 +96,16 @@ struct wiphy;
  * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
  *	on this channel.
  * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel.
+ * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted
+ *	on this channel.
  *
  */
 enum ieee80211_channel_flags {
@@ -113,6 +123,11 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
 	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 	IEEE80211_CHAN_NO_HE		= 1<<13,
+	IEEE80211_CHAN_1MHZ		= 1<<14,
+	IEEE80211_CHAN_2MHZ		= 1<<15,
+	IEEE80211_CHAN_4MHZ		= 1<<16,
+	IEEE80211_CHAN_8MHZ		= 1<<17,
+	IEEE80211_CHAN_16MHZ		= 1<<18,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0584e0d349f0..4e119c6afa31 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3737,6 +3737,16 @@ enum nl80211_wmm_rule {
  * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel
  *	in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz
+ * @NL80211_FREQUENCY_ATTR_1MHZ: 1 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_2MHZ: 2 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_4MHZ: 4 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_8MHZ: 8 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed
+ *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -3768,6 +3778,11 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_WMM,
 	NL80211_FREQUENCY_ATTR_NO_HE,
 	NL80211_FREQUENCY_ATTR_OFFSET,
+	NL80211_FREQUENCY_ATTR_1MHZ,
+	NL80211_FREQUENCY_ATTR_2MHZ,
+	NL80211_FREQUENCY_ATTR_4MHZ,
+	NL80211_FREQUENCY_ATTR_8MHZ,
+	NL80211_FREQUENCY_ATTR_16MHZ,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 11b34737b18a70c74d5cf13ee58d36e95879013c Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Tue, 8 Sep 2020 12:03:06 -0700
Subject: nl80211: support setting S1G channels

S1G channels have a single width defined per frequency, so
derive it from the channel flags with
ieee80211_s1g_channel_width().

Also support setting an S1G channel where control frequency may
differ from operating, and add some basic validation to
ensure the control channel is with the operating.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200908190323.15814-6-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2a7561743717..44db9f80e495 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5294,6 +5294,16 @@ ieee80211_channel_to_khz(const struct ieee80211_channel *chan)
 	return MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset;
 }
 
+/**
+ * ieee80211_s1g_channel_width - get allowed channel width from @chan
+ *
+ * Only allowed for band NL80211_BAND_S1GHZ
+ * @chan: channel
+ * Return: The allowed channel width for this center_freq
+ */
+enum nl80211_chan_width
+ieee80211_s1g_channel_width(const struct ieee80211_channel *chan);
+
 /**
  * ieee80211_channel_to_freq_khz - convert channel number to frequency
  * @chan: channel number
-- 
cgit v1.2.3


From 291c49ded2fda1fd0d7bd6056de99fe47d2332e6 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:05:29 +0000
Subject: nl80211: Add FILS discovery support

FILS discovery attribute, NL80211_ATTR_FILS_DISCOVERY, is nested which
supports following parameters as given in IEEE Std 802.11ai-2016,
Annex C.3 MIB detail:
(1) NL80211_FILS_DISCOVERY_ATTR_INT_MIN - Minimum packet interval
(2) NL80211_FILS_DISCOVERY_ATTR_INT_MAX - Maximum packet interval
(3) NL80211_FILS_DISCOVERY_ATTR_TMPL - Template data

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/20200805011838.28166-2-alokad@codeaurora.org
[fix attribute and other names, use NLA_RANGE(), use policy only once]
Link: https://lore.kernel.org/r/010101747a7b38a8-306f06b2-9061-4baf-81c1-054a42a18e22-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 19 +++++++++++++++++++
 include/uapi/linux/nl80211.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 44db9f80e495..c90700727945 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1082,6 +1082,23 @@ struct cfg80211_acl_data {
 	struct mac_address mac_addrs[];
 };
 
+/**
+ * struct cfg80211_fils_discovery - FILS discovery parameters from
+ * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @min_interval: Minimum packet interval in TUs (0 - 10000)
+ * @max_interval: Maximum packet interval in TUs (0 - 10000)
+ * @tmpl_len: Template length
+ * @tmpl: Template data for FILS discovery frame including the action
+ *	frame headers.
+ */
+struct cfg80211_fils_discovery {
+	u32 min_interval;
+	u32 max_interval;
+	size_t tmpl_len;
+	const u8 *tmpl;
+};
+
 /**
  * enum cfg80211_ap_settings_flags - AP settings flags
  *
@@ -1129,6 +1146,7 @@ enum cfg80211_ap_settings_flags {
  * @he_obss_pd: OBSS Packet Detection settings
  * @he_bss_color: BSS Color settings
  * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
+ * @fils_discovery: FILS discovery transmission parameters
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1159,6 +1177,7 @@ struct cfg80211_ap_settings {
 	u32 flags;
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
+	struct cfg80211_fils_discovery fils_discovery;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 4e119c6afa31..ad2bea3b07e3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2513,6 +2513,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from
  *	association request when used with NL80211_CMD_NEW_STATION).
  *
+ * @NL80211_ATTR_FILS_DISCOVERY: Optional parameter to configure FILS
+ *	discovery. It is a nested attribute, see
+ *	&enum nl80211_fils_discovery_attributes.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2995,6 +2999,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_HE_6GHZ_CAPABILITY,
 
+	NL80211_ATTR_FILS_DISCOVERY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5867,6 +5873,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication
  *	in AP mode (SAE password is passed as part of the start AP command).
  *
+ * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery
+ *	frames transmission
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5925,6 +5934,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION,
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
 	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
+	NL80211_EXT_FEATURE_FILS_DISCOVERY,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -7019,4 +7029,38 @@ enum nl80211_iftype_akm_attributes {
 	NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1,
 };
 
+/**
+ * enum nl80211_fils_discovery_attributes - FILS discovery configuration
+ * from IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @__NL80211_FILS_DISCOVERY_ATTR_INVALID: Invalid
+ *
+ * @NL80211_FILS_DISCOVERY_ATTR_INT_MIN: Minimum packet interval (u32, TU).
+ *	Allowed range: 0..10000 (TU = Time Unit)
+ * @NL80211_FILS_DISCOVERY_ATTR_INT_MAX: Maximum packet interval (u32, TU).
+ *	Allowed range: 0..10000 (TU = Time Unit)
+ * @NL80211_FILS_DISCOVERY_ATTR_TMPL: Template data for FILS discovery action
+ *	frame including the headers.
+ *
+ * @__NL80211_FILS_DISCOVERY_ATTR_LAST: Internal
+ * @NL80211_FILS_DISCOVERY_ATTR_MAX: highest attribute
+ */
+enum nl80211_fils_discovery_attributes {
+	__NL80211_FILS_DISCOVERY_ATTR_INVALID,
+
+	NL80211_FILS_DISCOVERY_ATTR_INT_MIN,
+	NL80211_FILS_DISCOVERY_ATTR_INT_MAX,
+	NL80211_FILS_DISCOVERY_ATTR_TMPL,
+
+	/* keep last */
+	__NL80211_FILS_DISCOVERY_ATTR_LAST,
+	NL80211_FILS_DISCOVERY_ATTR_MAX = __NL80211_FILS_DISCOVERY_ATTR_LAST - 1
+};
+
+/*
+ * FILS discovery template minimum length with action frame headers and
+ * mandatory fields.
+ */
+#define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42
+
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From 295b02c4be74bebf988593b8322369513fcecf68 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:05:31 +0000
Subject: mac80211: Add FILS discovery support

This patch adds mac80211 support to configure FILS discovery
transmission.
Changes include functions to store and retrieve FILS discovery
template, minimum and maximum packet intervals.

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/20200805011838.28166-3-alokad@codeaurora.org
[remove SUPPORTS_FILS_DISCOVERY, driver can just set wiphy info]
Link: https://lore.kernel.org/r/010101747a7b3cbb-6edaa89c-436d-4391-8765-61456d7f5f4e-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d3c43420779c..9381f00d0942 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -317,6 +317,7 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_TWT: TWT status changed
  * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed.
  * @BSS_CHANGED_HE_BSS_COLOR: BSS Color has changed
+ * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
  *
  */
 enum ieee80211_bss_change {
@@ -350,6 +351,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_TWT			= 1<<27,
 	BSS_CHANGED_HE_OBSS_PD		= 1<<28,
 	BSS_CHANGED_HE_BSS_COLOR	= 1<<29,
+	BSS_CHANGED_FILS_DISCOVERY      = 1<<30,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -490,6 +492,18 @@ struct ieee80211_ftm_responder_params {
 	size_t civicloc_len;
 };
 
+/**
+ * struct ieee80211_fils_discovery - FILS discovery parameters from
+ * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @min_interval: Minimum packet interval in TUs (0 - 10000)
+ * @max_interval: Maximum packet interval in TUs (0 - 10000)
+ */
+struct ieee80211_fils_discovery {
+	u32 min_interval;
+	u32 max_interval;
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -607,6 +621,7 @@ struct ieee80211_ftm_responder_params {
  * @he_oper: HE operation information of the AP we are connected to
  * @he_obss_pd: OBSS Packet Detection parameters.
  * @he_bss_color: BSS coloring settings, if BSS supports HE
+ * @fils_discovery: FILS discovery configuration
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -674,6 +689,7 @@ struct ieee80211_bss_conf {
 	} he_oper;
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
+	struct ieee80211_fils_discovery fils_discovery;
 };
 
 /**
@@ -6629,4 +6645,15 @@ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
  */
 bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable);
 
+/**
+ * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template.
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: FILS discovery template. %NULL on error.
+ */
+struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
+						  struct ieee80211_vif *vif);
 #endif /* MAC80211_H */
-- 
cgit v1.2.3


From 7443dcd1f1718a355e9c4ebeb7e95c3f9f27bb5f Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:33:00 +0000
Subject: nl80211: Unsolicited broadcast probe response support

This patch adds new attributes to support unsolicited broadcast
probe response transmission used for in-band
discovery in 6GHz band (IEEE P802.11ax/D6.0 26.17.2.3.2, AP behavior for
fast passive scanning).
The new attribute, NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, is nested which
supports following parameters:
(1) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT - Packet interval
(2) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL - Template data

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/010101747a946698-aac263ae-2ed3-4dab-9590-0bc7131214e1-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 18 ++++++++++++++++++
 include/uapi/linux/nl80211.h | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c90700727945..93d666a571da 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1099,6 +1099,22 @@ struct cfg80211_fils_discovery {
 	const u8 *tmpl;
 };
 
+/**
+ * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
+ *	response parameters in 6GHz.
+ *
+ * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
+ *	in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
+ *	scanning
+ * @tmpl_len: Template length
+ * @tmpl: Template data for probe response
+ */
+struct cfg80211_unsol_bcast_probe_resp {
+	u32 interval;
+	size_t tmpl_len;
+	const u8 *tmpl;
+};
+
 /**
  * enum cfg80211_ap_settings_flags - AP settings flags
  *
@@ -1147,6 +1163,7 @@ enum cfg80211_ap_settings_flags {
  * @he_bss_color: BSS Color settings
  * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
  * @fils_discovery: FILS discovery transmission parameters
+ * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1178,6 +1195,7 @@ struct cfg80211_ap_settings {
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
 	struct cfg80211_fils_discovery fils_discovery;
+	struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ad2bea3b07e3..bdc90b8dfd24 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2517,6 +2517,10 @@ enum nl80211_commands {
  *	discovery. It is a nested attribute, see
  *	&enum nl80211_fils_discovery_attributes.
  *
+ * @NL80211_ATTR_UNSOL_BCAST_PROBE_RESP: Optional parameter to configure
+ *	unsolicited broadcast probe response. It is a nested attribute, see
+ *	&enum nl80211_unsol_bcast_probe_resp_attributes.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3001,6 +3005,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FILS_DISCOVERY,
 
+	NL80211_ATTR_UNSOL_BCAST_PROBE_RESP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5876,6 +5882,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery
  *	frames transmission
  *
+ * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports
+ *	unsolicited broadcast probe response transmission
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5935,6 +5944,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
 	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
 	NL80211_EXT_FEATURE_FILS_DISCOVERY,
+	NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -7063,4 +7073,30 @@ enum nl80211_fils_discovery_attributes {
  */
 #define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42
 
+/**
+ * enum nl80211_unsol_bcast_probe_resp_attributes - Unsolicited broadcast probe
+ *	response configuration. Applicable only in 6GHz.
+ *
+ * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID: Invalid
+ *
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT: Maximum packet interval (u32, TU).
+ *	Allowed range: 0..20 (TU = Time Unit). IEEE P802.11ax/D6.0
+ *	26.17.2.3.2 (AP behavior for fast passive scanning).
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL: Unsolicited broadcast probe response
+ *	frame template (binary).
+ *
+ * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST: Internal
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX: highest attribute
+ */
+enum nl80211_unsol_bcast_probe_resp_attributes {
+	__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID,
+
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT,
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL,
+
+	/* keep last */
+	__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST,
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX =
+		__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1
+};
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From 632189a0180fdaae6715c83c68cc5c8998d6c841 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:33:01 +0000
Subject: mac80211: Unsolicited broadcast probe response support

This patch adds mac80211 support to configure unsolicited
broadcast probe response transmission for in-band discovery in 6GHz.

Changes include functions to store and retrieve probe response template,
and packet interval (0 - 20 TUs).
Setting interval to 0 disables the unsolicited broadcast probe response
transmission.

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/010101747a946b35-ad25858a-1f1f-48df-909e-dc7bf26d9169-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9381f00d0942..f0908b567d65 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -318,6 +318,8 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed.
  * @BSS_CHANGED_HE_BSS_COLOR: BSS Color has changed
  * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
+ * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
+ *	status changed.
  *
  */
 enum ieee80211_bss_change {
@@ -352,6 +354,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_HE_OBSS_PD		= 1<<28,
 	BSS_CHANGED_HE_BSS_COLOR	= 1<<29,
 	BSS_CHANGED_FILS_DISCOVERY      = 1<<30,
+	BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -622,6 +625,8 @@ struct ieee80211_fils_discovery {
  * @he_obss_pd: OBSS Packet Detection parameters.
  * @he_bss_color: BSS coloring settings, if BSS supports HE
  * @fils_discovery: FILS discovery configuration
+ * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response
+ *	interval.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -690,6 +695,7 @@ struct ieee80211_bss_conf {
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
 	struct ieee80211_fils_discovery fils_discovery;
+	u32 unsol_bcast_probe_resp_interval;
 };
 
 /**
@@ -6656,4 +6662,18 @@ bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable);
  */
 struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
 						  struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast
+ *	probe response template.
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: Unsolicited broadcast probe response template. %NULL on error.
+ */
+struct sk_buff *
+ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
+					  struct ieee80211_vif *vif);
 #endif /* MAC80211_H */
-- 
cgit v1.2.3


From 9ff167e178224069221a5771c12dfea9737bf3a3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Sep 2020 13:19:22 +0200
Subject: cfg80211: add missing kernel-doc for S1G band capabilities

Add missing kernel-doc for the S1G band capabilities in the
per band data.

Link: https://lore.kernel.org/r/20200918131921.08c893cd73a1.Id71583c37baca8a9a3329426e02b66d9ab65ac03@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 93d666a571da..10c2cc8f0efc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -465,6 +465,7 @@ struct ieee80211_sta_s1g_cap {
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
  * @edmg_cap: EDMG capabilities in this band
+ * @s1g_cap: S1G capabilities in this band (S1B band only, of course)
  * @n_iftype_data: number of iftype data entries
  * @iftype_data: interface type data entries.  Note that the bits in
  *	@types_mask inside this structure cannot overlap (i.e. only
-- 
cgit v1.2.3


From 7fba53ebb5b2d89d95b697f4c42c73c6fb7ba0c6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Sep 2020 13:21:16 +0200
Subject: mac80211: fix some encapsulation offload kernel-doc

Add a missing kernel-doc entry for the offload_flags, and
correct the name of the update_vif_offload method.

Link: https://lore.kernel.org/r/20200918132115.d46a0915ba8a.Ibba536d04a5a5fb655f8ef6e51b247457bfda4ca@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f0908b567d65..e90089d104b0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1690,6 +1690,8 @@ enum ieee80211_offload_flags {
  * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
  * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
  *	protected by fq->lock.
+ * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
+ *	&enum ieee80211_offload_flags.
  */
 struct ieee80211_vif {
 	enum nl80211_iftype type;
@@ -3864,7 +3866,7 @@ enum ieee80211_reconfig_type {
  * @set_tid_config: Apply TID specific configurations. This callback may sleep.
  * @reset_tid_config: Reset TID specific configuration for the peer.
  *	This callback may sleep.
- * @update_vif_config: Update virtual interface offload flags
+ * @update_vif_offload: Update virtual interface offload flags
  *	This callback may sleep.
  * @sta_set_4addr: Called to notify the driver when a station starts/stops using
  *	4-address mode
-- 
cgit v1.2.3


From f92970c694b36a4dbac2b650b173c78c0f0954cc Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 17 Sep 2020 18:13:23 -0700
Subject: devlink: add timeout information to status_notify

Add a timeout element to the DEVLINK_CMD_FLASH_UPDATE_STATUS
netlink message for use by a userland utility to show that
a particular firmware flash activity may take a long but
bounded time to finish.  Also add a handy helper for drivers
to make use of the new timeout value.

UI usage hints:
 - if non-zero, add timeout display to the end of the status line
 	[component] status_msg  ( Xm Ys : Am Bs )
     using the timeout value for Am Bs and updating the Xm Ys
     every second
 - if the timeout expires while awaiting the next update,
   display something like
 	[component] status_msg  ( timeout reached : Am Bs )
 - if new status notify messages are received, remove
   the timeout and start over

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 4 ++++
 include/uapi/linux/devlink.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 48b1c1ef1ebd..be132c17fbcc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1403,6 +1403,10 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
 					const char *component,
 					unsigned long done,
 					unsigned long total);
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+					 const char *status_msg,
+					 const char *component,
+					 unsigned long timeout);
 
 int devlink_traps_register(struct devlink *devlink,
 			   const struct devlink_trap *traps,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 631f5bdf1707..a2ecc8b00611 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -462,6 +462,9 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
 	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From 6700acc5f1fe97b5705832f2678cba9e9756a0dc Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 17 Sep 2020 18:13:24 -0700
Subject: devlink: collect flash notify params into a struct

The dev flash status notify function parameter lists are getting
rather long, so add a struct to be filled and passed rather than
continuously changing the function signatures.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index be132c17fbcc..73065f07bf17 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -391,6 +391,25 @@ struct devlink_param_gset_ctx {
 	enum devlink_param_cmode cmode;
 };
 
+/**
+ * struct devlink_flash_notify - devlink dev flash notify data
+ * @status_msg: current status string
+ * @component: firmware component being updated
+ * @done: amount of work completed of total amount
+ * @total: amount of work expected to be done
+ * @timeout: expected max timeout in seconds
+ *
+ * These are values to be given to userland to be displayed in order
+ * to show current activity in a firmware update process.
+ */
+struct devlink_flash_notify {
+	const char *status_msg;
+	const char *component;
+	unsigned long done;
+	unsigned long total;
+	unsigned long timeout;
+};
+
 /**
  * struct devlink_param - devlink configuration parameter data
  * @name: name of the parameter
-- 
cgit v1.2.3


From daef1ee3798b25e8464b8eb618eaa74b8f423ac7 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Fri, 18 Sep 2020 08:17:27 +0700
Subject: tipc: introduce encryption master key

In addition to the supported cluster & per-node encryption keys for the
en/decryption of TIPC messages, we now introduce one option for user to
set a cluster key as 'master key', which is simply a symmetric key like
the former but has a longer life cycle. It has two purposes:

- Authentication of new member nodes in the cluster. New nodes, having
  no knowledge of current session keys in the cluster will still be
  able to join the cluster as long as they know the master key. This is
  because all neighbor discovery (LINK_CONFIG) messages must be
  encrypted with this key.

- Encryption of session encryption keys during automatic exchange and
  update of those.This is a feature we will introduce in a later commit
  in this series.

We insert the new key into the currently unused slot 0 in the key array
and start using it immediately once the user has set it.
After joining, a node only knowing the master key should be fully
communicable to existing nodes in the cluster, although those nodes may
have their own session keys activated (i.e. not the master one). To
support this, we define a 'grace period', starting from the time a node
itself reports having no RX keys, so the existing nodes will use the
master key for encryption instead. The grace period can be extended but
will automatically stop after e.g. 5 seconds without a new report. This
is also the basis for later key exchanging feature as the new node will
be impossible to decrypt anything without the support from master key.

For user to set a master key, we define a new netlink flag -
'TIPC_NLA_NODE_KEY_MASTER', so it can be added to the current 'set key'
netlink command to specify the setting key to be a master key.

Above all, the traditional cluster/per-node key mechanism is guaranteed
to work when user comes not to use this master key option. This is also
compatible to legacy nodes without the feature supported.

Even this master key can be updated without any interruption of cluster
connectivity but is so is needed, this has to be coordinated and set by
the user.

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index dc0d23a50e69..d484baa9d365 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -165,6 +165,7 @@ enum {
 	TIPC_NLA_NODE_UP,		/* flag */
 	TIPC_NLA_NODE_ID,		/* data */
 	TIPC_NLA_NODE_KEY,		/* data */
+	TIPC_NLA_NODE_KEY_MASTER,	/* flag */
 
 	__TIPC_NLA_NODE_MAX,
 	TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
-- 
cgit v1.2.3


From 23700da29b83e859a8c3727fddd33ba74c4f3a39 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Fri, 18 Sep 2020 08:17:29 +0700
Subject: tipc: add automatic rekeying for encryption key

Rekeying is required for security since a key is less secure when using
for a long time. Also, key will be detached when its nonce value (or
seqno ...) is exhausted. We now make the rekeying process automatic and
configurable by user.

Basically, TIPC will at a specific interval generate a new key by using
the kernel 'Random Number Generator' cipher, then attach it as the node
TX key and securely distribute to others in the cluster as RX keys (-
the key exchange). The automatic key switching will then take over, and
make the new key active shortly. Afterwards, the traffic from this node
will be encrypted with the new session key. The same can happen in peer
nodes but not necessarily at the same time.

For simplicity, the automatically generated key will be initiated as a
per node key. It is not too hard to also support a cluster key rekeying
(e.g. a given node will generate a unique cluster key and update to the
others in the cluster...), but that doesn't bring much benefit, while a
per-node key is even more secure.

We also enable user to force a rekeying or change the rekeying interval
via netlink, the new 'set key' command option: 'TIPC_NLA_NODE_REKEYING'
is added for these purposes as follows:
- A value >= 1 will be set as the rekeying interval (in minutes);
- A value of 0 will disable the rekeying;
- A value of 'TIPC_REKEYING_NOW' (~0) will force an immediate rekeying;

The default rekeying interval is (60 * 24) minutes i.e. done every day.
There isn't any restriction for the value but user shouldn't set it too
small or too large which results in an "ineffective" rekeying (thats ok
for testing though).

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h         | 2 ++
 include/uapi/linux/tipc_netlink.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index add01db1daef..80ea15e12113 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -254,6 +254,8 @@ static inline int tipc_aead_key_size(struct tipc_aead_key *key)
 	return sizeof(*key) + key->keylen;
 }
 
+#define TIPC_REKEYING_NOW		(~0U)
+
 /* The macros and functions below are deprecated:
  */
 
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d484baa9d365..d847dd671d79 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -166,6 +166,7 @@ enum {
 	TIPC_NLA_NODE_ID,		/* data */
 	TIPC_NLA_NODE_KEY,		/* data */
 	TIPC_NLA_NODE_KEY_MASTER,	/* flag */
+	TIPC_NLA_NODE_REKEYING,		/* u32 */
 
 	__TIPC_NLA_NODE_MAX,
 	TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
-- 
cgit v1.2.3


From 3753d9779038ab011e01b949253492aaa37bf57a Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Thu, 17 Sep 2020 22:08:32 -0700
Subject: net: fix build without CONFIG_SYSCTL definition

Earlier commit 316cdaa1158a ("net: add option to not create fall-back
tunnels in root-ns as well") removed the CONFIG_SYSCTL to enable the
kernel-commandline to work. However, this variable gets defined only
when CONFIG_SYSCTL option is selected.

With this change the behavior would default to creating fall-back
tunnels in all namespaces when CONFIG_SYSCTL is not selected and
the kernel commandline option will be ignored.

Fixes: 316cdaa1158a ("net: add option to not create fall-back tunnels in root-ns as well")
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 909b1fbb0481..fef0eb96cf69 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -634,8 +634,9 @@ extern int sysctl_devconf_inherit_init_net;
  */
 static inline bool net_has_fallback_tunnels(const struct net *net)
 {
-	return (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1) ||
-	       !sysctl_fb_tunnels_only_for_init_net;
+	return !IS_ENABLED(CONFIG_SYSCTL) ||
+	       !sysctl_fb_tunnels_only_for_init_net ||
+	       (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
 }
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
-- 
cgit v1.2.3


From 6d23d831e9bd0b1d2bcd9a1ecdc6ac8e6d162c36 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Fri, 18 Sep 2020 17:48:01 +0800
Subject: ptp_qoriq: support FIPER3

The FIPER3 (fixed interval period pulse generator) is supported on
DPAA2 and ENETC network controller hardware. This patch is to support
it in ptp_qoriq driver.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fsl/ptp_qoriq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 884b8f8ca06d..01acebe37fab 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -136,6 +136,7 @@ struct ptp_qoriq_registers {
 #define DEFAULT_TMR_PRSC	2
 #define DEFAULT_FIPER1_PERIOD	1000000000
 #define DEFAULT_FIPER2_PERIOD	1000000000
+#define DEFAULT_FIPER3_PERIOD	1000000000
 
 struct ptp_qoriq {
 	void __iomem *base;
@@ -147,6 +148,7 @@ struct ptp_qoriq {
 	struct dentry *debugfs_root;
 	struct device *dev;
 	bool extts_fifo_support;
+	bool fiper3_support;
 	int irq;
 	int phc_index;
 	u32 tclk_period;  /* nanoseconds */
@@ -155,6 +157,7 @@ struct ptp_qoriq {
 	u32 cksel;
 	u32 tmr_fiper1;
 	u32 tmr_fiper2;
+	u32 tmr_fiper3;
 	u32 (*read)(unsigned __iomem *addr);
 	void (*write)(unsigned __iomem *addr, u32 val);
 };
-- 
cgit v1.2.3


From 881321b6ed9e983ad733268404245fcf0be0b23c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 18 Sep 2020 13:57:51 +0300
Subject: net: mscc: ocelot: make ocelot_init_timestamp take a const struct
 ptp_clock_info

It is a good measure to ensure correctness if the structures that are
meant to remain constant are only processed by functions that thake
constant arguments.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot_ptp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index 4a6b2f71b6b2..6a7388fa7cc5 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -53,6 +53,7 @@ int ocelot_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
 		      enum ptp_pin_function func, unsigned int chan);
 int ocelot_ptp_enable(struct ptp_clock_info *ptp,
 		      struct ptp_clock_request *rq, int on);
-int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info);
+int ocelot_init_timestamp(struct ocelot *ocelot,
+			  const struct ptp_clock_info *info);
 int ocelot_deinit_timestamp(struct ocelot *ocelot);
 #endif
-- 
cgit v1.2.3


From e14e05e71d106aef973e2cf100e540d911703a6e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 18 Sep 2020 21:11:01 +0200
Subject: net: devlink: regions: Add a priv member to the regions ops struct

The driver may have multiple regions which can be dumped using one
function. However, for this to work, additional information is
needed. Add a priv member to the ops structure for the driver to use
however it likes.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 73065f07bf17..b68e483d9267 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -561,12 +561,14 @@ struct devlink_info_req;
  *            the data variable must be updated to point to the snapshot data.
  *            The function will be called while the devlink instance lock is
  *            held.
+ * @priv: Pointer to driver private data for the region operation
  */
 struct devlink_region_ops {
 	const char *name;
 	void (*destructor)(const void *data);
 	int (*snapshot)(struct devlink *devlink, struct netlink_ext_ack *extack,
 			u8 **data);
+	void *priv;
 };
 
 struct devlink_fmsg;
-- 
cgit v1.2.3


From d4602a9f47196dd62deba66ec361b5897f1ae62b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 18 Sep 2020 21:11:02 +0200
Subject: net: devlink: region: Pass the region ops to the snapshot function

Pass the region to be snapshotted to the function performing the
snapshot. This allows one function to operate on numerous regions.

v4:
Add missing kerneldoc for ICE

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b68e483d9267..4883dbae7faf 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -566,7 +566,9 @@ struct devlink_info_req;
 struct devlink_region_ops {
 	const char *name;
 	void (*destructor)(const void *data);
-	int (*snapshot)(struct devlink *devlink, struct netlink_ext_ack *extack,
+	int (*snapshot)(struct devlink *devlink,
+			const struct devlink_region_ops *ops,
+			struct netlink_ext_ack *extack,
 			u8 **data);
 	void *priv;
 };
-- 
cgit v1.2.3


From ccc3e6b0191c58784c4b4ffc735f81970df33a11 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 18 Sep 2020 21:11:03 +0200
Subject: net: dsa: Add helper to convert from devlink to ds

Given a devlink instance, return the dsa switch it is associated to.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 75c8fac82017..42ae6d4d9d43 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -664,6 +664,13 @@ struct dsa_devlink_priv {
 	struct dsa_switch *ds;
 };
 
+static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl)
+{
+	struct dsa_devlink_priv *dl_priv = devlink_priv(dl);
+
+	return dl_priv->ds;
+}
+
 struct dsa_switch_driver {
 	struct list_head	list;
 	const struct dsa_switch_ops *ops;
-- 
cgit v1.2.3


From 97c82c23135187878acea76bc5f0b03007e17ac7 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 18 Sep 2020 21:11:04 +0200
Subject: net: dsa: Add devlink regions support to DSA

Allow DSA drivers to make use of devlink regions, via simple wrappers.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 42ae6d4d9d43..431efb5098be 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -658,6 +658,12 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
 					   void *occ_get_priv);
 void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
 					     u64 resource_id);
+struct devlink_region *
+dsa_devlink_region_create(struct dsa_switch *ds,
+			  const struct devlink_region_ops *ops,
+			  u32 region_max_snapshots, u64 region_size);
+void dsa_devlink_region_destroy(struct devlink_region *region);
+
 struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
 
 struct dsa_devlink_priv {
-- 
cgit v1.2.3


From 0f06b855a93c3b449253b91abc94c4d483af0a44 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 18 Sep 2020 21:11:08 +0200
Subject: net: dsa: wire up devlink info get

Allow the DSA drivers to implement the devlink call to get info info,
e.g. driver name, firmware version, ASIC ID, etc.

v2:
Combine declaration and the assignment on a single line.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 431efb5098be..d16057c5987a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -612,11 +612,14 @@ struct dsa_switch_ops {
 	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
 				 struct sk_buff *skb, unsigned int type);
 
-	/* Devlink parameters */
+	/* Devlink parameters, etc */
 	int	(*devlink_param_get)(struct dsa_switch *ds, u32 id,
 				     struct devlink_param_gset_ctx *ctx);
 	int	(*devlink_param_set)(struct dsa_switch *ds, u32 id,
 				     struct devlink_param_gset_ctx *ctx);
+	int	(*devlink_info_get)(struct dsa_switch *ds,
+				    struct devlink_info_req *req,
+				    struct netlink_ext_ack *extack);
 
 	/*
 	 * MTU change functionality. Switches can also adjust their MRU through
-- 
cgit v1.2.3


From 55f13311785cebd60b9bab9ca7fd64205436c462 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Fri, 18 Sep 2020 14:14:51 -0500
Subject: ethtool: Add 100base-FX link mode entries

Add entries for the 100base-FX full and half duplex supported modes.

$ ethtool eth0
        Supported ports: [ FIBRE ]
        Supported link modes:  100baseFX/Half 100baseFX/Full
        Supported pause frame use: Symmetric Receive-only
        Supports auto-negotiation: No
        Supported FEC modes: Not reported
        Advertised link modes: 100baseFX/Half 100baseFX/Full
        Advertised pause frame use: No
        Advertised auto-negotiation: No
        Advertised FEC modes: Not reported
        Speed: 100Mb/s
        Duplex: Full
        Auto-negotiation: off
        Port: MII
        PHYAD: 1
        Transceiver: external
        Supports Wake-on: gs
        Wake-on: d
        SecureOn password: 00:00:00:00:00:00
        Current message level: 0x00000000 (0)

        Link detected: yes

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b4f2d134e713..9ca87bc73c44 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1617,6 +1617,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87,
 	ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT	 = 88,
 	ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT	 = 89,
+	ETHTOOL_LINK_MODE_100baseFX_Half_BIT		 = 90,
+	ETHTOOL_LINK_MODE_100baseFX_Full_BIT		 = 91,
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
 };
-- 
cgit v1.2.3


From bbed0bbdddaf46260aeb1a8910a3b32941e321a2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Sep 2020 03:10:30 +0300
Subject: net: dsa: tag_8021q: add VLANs to the master interface too

The whole purpose of tag_8021q is to send VLAN-tagged traffic to the
CPU, from which the driver can decode the source port and switch id.

Currently this only works if the VLAN filtering on the master is
disabled. Change that by explicitly adding code to tag_8021q.c to add
the VLANs corresponding to the tags to the filter of the master
interface.

Because we now need to call vlan_vid_add, then we also need to hold the
RTNL mutex. Propagate that requirement to the callers of dsa_8021q_setup
and modify the existing call sites as appropriate. Note that one call
path, sja1105_best_effort_vlan_filtering_set -> sja1105_vlan_filtering
-> sja1105_setup_8021q_tagging, was already holding this lock.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 2b003ae9fb38..88cd72dfa4e0 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -31,6 +31,8 @@ struct dsa_8021q_context {
 	const struct dsa_8021q_ops *ops;
 	struct dsa_switch *ds;
 	struct list_head crosschip_links;
+	/* EtherType of RX VID, used for filtering on master interface */
+	__be16 proto;
 };
 
 #define DSA_8021Q_N_SUBVLAN			8
-- 
cgit v1.2.3


From 49347755a84052fd1317c3897c7cea954c8f89fe Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 16 Sep 2020 00:34:53 +0200
Subject: can: include: fix spelling mistakes

This patch fixes spelling erros found by "codespell" in the
include/linux/can subtree.

Link: https://lore.kernel.org/r/20200915223527.1417033-4-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h | 2 +-
 include/linux/can/dev.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index e20a0cd09ba5..7da9f1f82e8e 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -2,7 +2,7 @@
 /*
  * linux/can/core.h
  *
- * Protoypes and definitions for CAN protocol modules using the PF_CAN core
+ * Prototypes and definitions for CAN protocol modules using the PF_CAN core
  *
  * Authors: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
  *          Urs Thuermann   <urs.thuermann@volkswagen.de>
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5e3d45525bd3..516892566ac9 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -108,7 +108,7 @@ static inline bool can_skb_headroom_valid(struct net_device *dev,
 
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		/* preform proper loopback on capable devices */
+		/* perform proper loopback on capable devices */
 		if (dev->flags & IFF_ECHO)
 			skb->pkt_type = PACKET_LOOPBACK;
 		else
-- 
cgit v1.2.3


From 80a71815d8cd1be6481ad16fad3167f095045a06 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 16 Sep 2020 00:35:01 +0200
Subject: can: dev: can_put_echo_skb(): propagate error in case of errors

The function can_put_echo_skb() can fail for several reasons. It may
fail due to OOM, but when it fails it's usually due to locking problems
in the driver.

In order to help developing and debugging of new drivers propagate error
value in case of errors.

Link: https://lore.kernel.org/r/20200915223527.1417033-12-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 516892566ac9..ed0482b2f4b2 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -201,8 +201,8 @@ void can_bus_off(struct net_device *dev);
 void can_change_state(struct net_device *dev, struct can_frame *cf,
 		      enum can_state tx_state, enum can_state rx_state);
 
-void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
-		      unsigned int idx);
+int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
+		     unsigned int idx);
 struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
 				   u8 *len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
-- 
cgit v1.2.3


From 728fc9ff73d3f25220f6b8a52aaf063ec51ef294 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 16 Sep 2020 00:35:22 +0200
Subject: can: rx-offload: can_rx_offload_add_manual(): add new initialization
 function

This patch adds a new initialization function:
can_rx_offload_add_manual()

It should be used to add support rx-offload to a driver, if the callback
mechanism should not be used. Use e.g. can_rx_offload_queue_sorted() to queue
skbs into rx-offload.

Link: https://lore.kernel.org/r/20200915223527.1417033-33-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/rx-offload.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 1b78a0cfb615..f1b38088b765 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -35,6 +35,9 @@ int can_rx_offload_add_timestamp(struct net_device *dev,
 int can_rx_offload_add_fifo(struct net_device *dev,
 			    struct can_rx_offload *offload,
 			    unsigned int weight);
+int can_rx_offload_add_manual(struct net_device *dev,
+			      struct can_rx_offload *offload,
+			      unsigned int weight);
 int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
 					 u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From 2af30f115d6957f372ce3096c7198763ff253d97 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 21 Sep 2020 13:12:17 +0100
Subject: btf: Make btf_set_contains take a const pointer

bsearch doesn't modify the contents of the array, so we can take a const pointer.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200921121227.255763-2-lmb@cloudflare.com
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d7c5a6ed87e3..0478b20d335b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1905,6 +1905,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *addr1, void *addr2);
 
 struct btf_id_set;
-bool btf_id_set_contains(struct btf_id_set *set, u32 id);
+bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
 
 #endif /* _LINUX_BPF_H */
-- 
cgit v1.2.3


From 27774b7073b5d520c80f1fcb8e9993fc139f21bd Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 21 Sep 2020 13:12:19 +0100
Subject: btf: Add BTF_ID_LIST_SINGLE macro

Add a convenience macro that allows defining a BTF ID list with
a single item. This lets us cut down on repetitive macros.

Suggested-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200921121227.255763-4-lmb@cloudflare.com
---
 include/linux/btf_ids.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 210b086188a3..57890b357f85 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -76,6 +76,13 @@ extern u32 name[];
 #define BTF_ID_LIST_GLOBAL(name)			\
 __BTF_ID_LIST(name, globl)
 
+/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
+ * a single entry.
+ */
+#define BTF_ID_LIST_SINGLE(name, prefix, typename)	\
+	BTF_ID_LIST(name) \
+	BTF_ID(prefix, typename)
+
 /*
  * The BTF_ID_UNUSED macro defines 4 zero bytes.
  * It's used when we want to define 'unused' entry
@@ -140,6 +147,7 @@ extern struct btf_id_set name;
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
 #define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
 #define BTF_SET_START(name) static struct btf_id_set name = { 0 };
 #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
 #define BTF_SET_END(name)
-- 
cgit v1.2.3


From 9436ef6e862b9ca22e5b12f87b106e07d5af4cae Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 21 Sep 2020 13:12:20 +0100
Subject: bpf: Allow specifying a BTF ID per argument in function protos

Function prototypes using ARG_PTR_TO_BTF_ID currently use two ways to signal
which BTF IDs are acceptable. First, bpf_func_proto.btf_id is an array of
IDs, one for each argument. This array is only accessed up to the highest
numbered argument that uses ARG_PTR_TO_BTF_ID and may therefore be less than
five arguments long. It usually points at a BTF_ID_LIST. Second, check_btf_id
is a function pointer that is called by the verifier if present. It gets the
actual BTF ID of the register, and the argument number we're currently checking.
It turns out that the only user check_arg_btf_id ignores the argument, and is
simply used to check whether the BTF ID has a struct sock_common at it's start.

Replace both of these mechanisms with an explicit BTF ID for each argument
in a function proto. Thanks to btf_struct_ids_match this is very flexible:
check_arg_btf_id can be replaced by requiring struct sock_common.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200921121227.255763-5-lmb@cloudflare.com
---
 include/linux/bpf.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0478b20d335b..87b0d5dcc1ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -326,12 +326,16 @@ struct bpf_func_proto {
 		};
 		enum bpf_arg_type arg_type[5];
 	};
-	int *btf_id; /* BTF ids of arguments */
-	bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is
-						    * valid. Often used if more
-						    * than one btf id is permitted
-						    * for this argument.
-						    */
+	union {
+		struct {
+			u32 *arg1_btf_id;
+			u32 *arg2_btf_id;
+			u32 *arg3_btf_id;
+			u32 *arg4_btf_id;
+			u32 *arg5_btf_id;
+		};
+		u32 *arg_btf_id[5];
+	};
 	int *ret_btf_id; /* return value btf_id */
 	bool (*allowed)(const struct bpf_prog *prog);
 };
@@ -1385,8 +1389,6 @@ int btf_struct_access(struct bpf_verifier_log *log,
 		      u32 *next_btf_id);
 bool btf_struct_ids_match(struct bpf_verifier_log *log,
 			  int off, u32 id, u32 need_type_id);
-int btf_resolve_helper_id(struct bpf_verifier_log *log,
-			  const struct bpf_func_proto *fn, int);
 
 int btf_distill_func_proto(struct bpf_verifier_log *log,
 			   struct btf *btf,
-- 
cgit v1.2.3


From f79e7ea571732a6e16f15c6e2f000c347e2d7431 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 21 Sep 2020 13:12:27 +0100
Subject: bpf: Use a table to drive helper arg type checks

The mapping between bpf_arg_type and bpf_reg_type is encoded in a big
hairy if statement that is hard to follow. The debug output also leaves
to be desired: if a reg_type doesn't match we only print one of the
options, instead printing all the valid ones.

Convert the if statement into a table which is then used to drive type
checking. If none of the reg_types match we print all options, e.g.:

    R2 type=rdonly_buf expected=fp, pkt, pkt_meta, map_value

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200921121227.255763-12-lmb@cloudflare.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 87b0d5dcc1ff..fc5c901c7542 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -292,6 +292,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
 	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
+	__BPF_ARG_TYPE_MAX,
 };
 
 /* type of values returned from helper functions */
-- 
cgit v1.2.3


From 8a8b9047a8975ad4bdee34b7affad66edfbe626d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 16 Sep 2020 22:16:56 +0800
Subject: netfilter: nf_tables: Remove ununsed function nft_data_debug

It is never used, so can be removed.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8ceca0e419b3..c4c526507ddb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -148,13 +148,6 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
 	memcpy(dst, src, len);
 }
 
-static inline void nft_data_debug(const struct nft_data *data)
-{
-	pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
-		 data->data[0], data->data[1],
-		 data->data[2], data->data[3]);
-}
-
 /**
  *	struct nft_ctx - nf_tables rule/set context
  *
-- 
cgit v1.2.3


From 92ec804f3dbf0d986f8e10850bfff14f316d7aaf Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 21 Sep 2020 15:10:53 -0700
Subject: net: phy: bcm7xxx: Add an entry for BCM72113

BCM72113 features a 28nm integrated EPHY, add an entry to the driver for
it.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6ad4c000661a..d0bd226d6bd9 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -30,6 +30,7 @@
 #define PHY_ID_BCM57780			0x03625d90
 #define PHY_ID_BCM89610			0x03625cd0
 
+#define PHY_ID_BCM72113			0x35905310
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7255			0xae025120
 #define PHY_ID_BCM7260			0xae025190
-- 
cgit v1.2.3


From e1ac11859a057ddcf7d6219bd090c7483541767d Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:15 +0300
Subject: net: bridge: add src field to br_ip

Add a new src field to struct br_ip which will be used to lookup S, G
entries. When SSM option is added we will enable full br_ip lookups.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6479a38e52fa..4fb9c4954f3a 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -18,6 +18,12 @@ struct br_ip {
 		__be32	ip4;
 #if IS_ENABLED(CONFIG_IPV6)
 		struct in6_addr ip6;
+#endif
+	} src;
+	union {
+		__be32	ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr ip6;
 #endif
 	} u;
 	__be16		proto;
-- 
cgit v1.2.3


From eab3227b1240bdcc06c0a01a3fc5bfd2bc12f406 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:17 +0300
Subject: net: bridge: mcast: rename br_ip's u member to dst

Since now we have src in br_ip, u no longer makes sense so rename
it to dst. No functional changes.

v2: fix build with CONFIG_BATMAN_ADV_MCAST

CC: Marek Lindner <mareklindner@neomailbox.ch>
CC: Simon Wunderlich <sw@simonwunderlich.de>
CC: Antonio Quartulli <a@unstable.cc>
CC: Sven Eckelmann <sven@narfation.org>
CC: b.a.t.m.a.n@lists.open-mesh.org
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 4fb9c4954f3a..556caed00258 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -25,7 +25,7 @@ struct br_ip {
 #if IS_ENABLED(CONFIG_IPV6)
 		struct in6_addr ip6;
 #endif
-	} u;
+	} dst;
 	__be16		proto;
 	__u16           vid;
 };
-- 
cgit v1.2.3


From 9c4258c78a2a7624c79b797f40ae2dbfd2555e26 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:18 +0300
Subject: net: bridge: mdb: add support to extend add/del commands

Since the MDB add/del code expects an exact struct br_mdb_entry we can't
really add any extensions, thus add a new nested attribute at the level of
MDBA_SET_ENTRY called MDBA_SET_ENTRY_ATTRS which will be used to pass
all new options via netlink attributes. This patch doesn't change
anything functionally since the new attribute is not used yet, only
parsed.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 75a2ac479247..dc52f8cffa0d 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -530,10 +530,22 @@ struct br_mdb_entry {
 enum {
 	MDBA_SET_ENTRY_UNSPEC,
 	MDBA_SET_ENTRY,
+	MDBA_SET_ENTRY_ATTRS,
 	__MDBA_SET_ENTRY_MAX,
 };
 #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1)
 
+/* [MDBA_SET_ENTRY_ATTRS] = {
+ *    [MDBE_ATTR_xxx]
+ *    ...
+ * }
+ */
+enum {
+	MDBE_ATTR_UNSPEC,
+	__MDBE_ATTR_MAX,
+};
+#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
+
 /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */
 enum {
 	BRIDGE_XSTATS_UNSPEC,
-- 
cgit v1.2.3


From 88d4bd180419a7cde3947f191dc4e26fbb19f80b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:19 +0300
Subject: net: bridge: mdb: add support for add/del/dump of entries with source

Add new mdb attributes (MDBE_ATTR_SOURCE for setting,
MDBA_MDB_EATTR_SOURCE for dumping) to allow add/del and dump of mdb
entries with a source address (S,G). New S,G entries are created with
filter mode of MCAST_INCLUDE. The same attributes are used for IPv4 and
IPv6, they're validated and parsed based on their protocol.
S,G host joined entries which are added by user are not allowed yet.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index dc52f8cffa0d..3e6377c865eb 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -457,6 +457,7 @@ enum {
 	MDBA_MDB_EATTR_TIMER,
 	MDBA_MDB_EATTR_SRC_LIST,
 	MDBA_MDB_EATTR_GROUP_MODE,
+	MDBA_MDB_EATTR_SOURCE,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
@@ -542,6 +543,7 @@ enum {
  */
 enum {
 	MDBE_ATTR_UNSPEC,
+	MDBE_ATTR_SOURCE,
 	__MDBE_ATTR_MAX,
 };
 #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
-- 
cgit v1.2.3


From 8f8cb77e0b22d9044d8d57ab3bb18ea8d0474752 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:21 +0300
Subject: net: bridge: mcast: add rt_protocol field to the port group struct

We need to be able to differentiate between pg entries created by
user-space and the kernel when we start generating S,G entries for
IGMPv3/MLDv2's fast path. User-space entries are created by default as
RTPROT_STATIC and the kernel entries are RTPROT_KERNEL. Later we can
allow user-space to provide the entry rt_protocol so we can
differentiate between who added the entries specifically (e.g. clag,
admin, frr etc).

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 3e6377c865eb..1054f151078d 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -458,6 +458,7 @@ enum {
 	MDBA_MDB_EATTR_SRC_LIST,
 	MDBA_MDB_EATTR_GROUP_MODE,
 	MDBA_MDB_EATTR_SOURCE,
+	MDBA_MDB_EATTR_RTPROT,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
-- 
cgit v1.2.3


From 8266a0491e92d39dc9af739e8380a0daa9b8836b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:24 +0300
Subject: net: bridge: mcast: handle port group filter modes

We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 1054f151078d..e4bd30a25f6b 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -518,6 +518,7 @@ struct br_mdb_entry {
 	__u8 state;
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
 #define MDB_FLAGS_FAST_LEAVE	(1 << 1)
+#define MDB_FLAGS_STAR_EXCL	(1 << 2)
 	__u8 flags;
 	__u16 vid;
 	struct {
-- 
cgit v1.2.3


From 9116ffbf1dd71f953ffda4198d01f82d3ca16df8 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:25 +0300
Subject: net: bridge: mcast: add support for blocked port groups

When excluding S,G entries we need a way to block a particular S,G,port.
The new port group flag is managed based on the source's timer as per
RFCs 3376 and 3810. When a source expires and its port group is in
EXCLUDE mode, it will be blocked.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index e4bd30a25f6b..4c687686aa8f 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -519,6 +519,7 @@ struct br_mdb_entry {
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
 #define MDB_FLAGS_FAST_LEAVE	(1 << 1)
 #define MDB_FLAGS_STAR_EXCL	(1 << 2)
+#define MDB_FLAGS_BLOCKED	(1 << 3)
 	__u8 flags;
 	__u16 vid;
 	struct {
-- 
cgit v1.2.3


From 39097ab66dbe89b16438dd6d178d49e75cb922ed Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 23 Sep 2020 00:29:02 +0200
Subject: net: phy: Fixup kernel doc

Add missing parameter documentation, or fixup wrong parameter names.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 3 ++-
 include/linux/phy.h  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 3a88b699b758..dbd69b3d170b 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -306,7 +306,7 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising)
 /**
  * mii_10gbt_stat_mod_linkmode_lpa_t
  * @advertising: target the linkmode advertisement settings
- * @adv: value of the C45 10GBASE-T AN STATUS register
+ * @lpa: value of the C45 10GBASE-T AN STATUS register
  *
  * A small helper function that translates C45 10GBASE-T AN STATUS register bits
  * to linkmode advertisement settings. Other bits in advertising aren't changed.
@@ -371,6 +371,7 @@ struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr);
 
 /**
  * mdio_module_driver() - Helper macro for registering mdio drivers
+ * @_mdio_driver: driver to register
  *
  * Helper macro for MDIO drivers which do not do anything special in module
  * init/exit. Each module may only use this macro once, and calling it
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3a09d2bf69ea..4901b2637059 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1566,8 +1566,9 @@ static inline int mdiobus_register_board_info(const struct mdio_board_info *i,
 
 
 /**
- * module_phy_driver() - Helper macro for registering PHY drivers
+ * phy_module_driver() - Helper macro for registering PHY drivers
  * @__phy_drivers: array of PHY drivers to register
+ * @__count: Numbers of members in array
  *
  * Helper macro for PHY drivers which do not do anything special in module
  * init/exit. Each module may only use this macro once, and calling it
-- 
cgit v1.2.3


From 4069a572d423b73919ae40a500dfc4b10f8a6f32 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 23 Sep 2020 00:29:03 +0200
Subject: net: phy: Document core PHY structures

Add kerneldoc for the core PHY data structures, a few inline functions
and exported functions which are not already documented.

v2
Typos
g/phy/PHY/s

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 423 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 292 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4901b2637059..eb3cb1a98b45 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -82,7 +82,39 @@ extern const int phy_10gbit_features_array[1];
 #define PHY_POLL_CABLE_TEST	0x00000004
 #define MDIO_DEVICE_IS_PHY	0x80000000
 
-/* Interface Mode definitions */
+/**
+ * enum phy_interface_t - Interface Mode definitions
+ *
+ * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch
+ * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined
+ * @PHY_INTERFACE_MODE_MII: Median-independent interface
+ * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface
+ * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface
+ * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface
+ * @PHY_INTERFACE_MODE_RMII: Reduced Media Independent Interface
+ * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay
+ * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay
+ * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay
+ * @PHY_INTERFACE_MODE_RTBI: Reduced TBI
+ * @PHY_INTERFACE_MODE_SMII: ??? MII
+ * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
+ * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII
+ * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII
+ * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX
+ * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX
+ * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI
+ * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface
+ * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR
+ * @PHY_INTERFACE_MODE_USXGMII:  Universal Serial 10GE MII
+ * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
+ * @PHY_INTERFACE_MODE_MAX: Book keeping
+ *
+ * Describes the interface between the MAC and PHY.
+ */
 typedef enum {
 	PHY_INTERFACE_MODE_NA,
 	PHY_INTERFACE_MODE_INTERNAL,
@@ -116,8 +148,8 @@ typedef enum {
 } phy_interface_t;
 
 /**
- * phy_supported_speeds - return all speeds currently supported by a phy device
- * @phy: The phy device to return supported speeds of.
+ * phy_supported_speeds - return all speeds currently supported by a PHY device
+ * @phy: The PHY device to return supported speeds of.
  * @speeds: buffer to store supported speeds in.
  * @size: size of speeds buffer.
  *
@@ -134,9 +166,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
  * phy_modes - map phy_interface_t enum to device tree binding of phy-mode
  * @interface: enum phy_interface_t value
  *
- * Description: maps 'enum phy_interface_t' defined in this file
+ * Description: maps enum &phy_interface_t defined in this file
  * into the device tree binding of 'phy-mode', so that Ethernet
- * device driver can get phy interface from device tree.
+ * device driver can get PHY interface from device tree.
  */
 static inline const char *phy_modes(phy_interface_t interface)
 {
@@ -215,6 +247,14 @@ struct sfp_bus;
 struct sfp_upstream_ops;
 struct sk_buff;
 
+/**
+ * struct mdio_bus_stats - Statistics counters for MDIO busses
+ * @transfers: Total number of transfers, i.e. @writes + @reads
+ * @errors: Number of MDIO transfers that returned an error
+ * @writes: Number of write transfers
+ * @reads: Number of read transfers
+ * @syncp: Synchronisation for incrementing statistics
+ */
 struct mdio_bus_stats {
 	u64_stats_t transfers;
 	u64_stats_t errors;
@@ -224,7 +264,15 @@ struct mdio_bus_stats {
 	struct u64_stats_sync syncp;
 };
 
-/* Represents a shared structure between different phydev's in the same
+/**
+ * struct phy_package_shared - Shared information in PHY packages
+ * @addr: Common PHY address used to combine PHYs in one package
+ * @refcnt: Number of PHYs connected to this shared data
+ * @flags: Initialization of PHY package
+ * @priv_size: Size of the shared private data @priv
+ * @priv: Driver private data shared across a PHY package
+ *
+ * Represents a shared structure between different phydev's in the same
  * package, for example a quad PHY. See phy_package_join() and
  * phy_package_leave().
  */
@@ -247,7 +295,14 @@ struct phy_package_shared {
 #define PHY_SHARED_F_INIT_DONE  0
 #define PHY_SHARED_F_PROBE_DONE 1
 
-/*
+/**
+ * struct mii_bus - Represents an MDIO bus
+ *
+ * @owner: Who owns this device
+ * @name: User friendly name for this MDIO device, or driver name
+ * @id: Unique identifier for this bus, typical from bus hierarchy
+ * @priv: Driver private data
+ *
  * The Bus class for PHYs.  Devices which provide access to
  * PHYs should register using this structure
  */
@@ -256,49 +311,58 @@ struct mii_bus {
 	const char *name;
 	char id[MII_BUS_ID_SIZE];
 	void *priv;
+	/** @read: Perform a read transfer on the bus */
 	int (*read)(struct mii_bus *bus, int addr, int regnum);
+	/** @write: Perform a write transfer on the bus */
 	int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val);
+	/** @reset: Perform a reset of the bus */
 	int (*reset)(struct mii_bus *bus);
+
+	/** @stats: Statistic counters per device on the bus */
 	struct mdio_bus_stats stats[PHY_MAX_ADDR];
 
-	/*
-	 * A lock to ensure that only one thing can read/write
+	/**
+	 * @mdio_lock: A lock to ensure that only one thing can read/write
 	 * the MDIO bus at a time
 	 */
 	struct mutex mdio_lock;
 
+	/** @parent: Parent device of this bus */
 	struct device *parent;
+	/** @state: State of bus structure */
 	enum {
 		MDIOBUS_ALLOCATED = 1,
 		MDIOBUS_REGISTERED,
 		MDIOBUS_UNREGISTERED,
 		MDIOBUS_RELEASED,
 	} state;
+
+	/** @dev: Kernel device representation */
 	struct device dev;
 
-	/* list of all PHYs on bus */
+	/** @mdio_map: list of all MDIO devices on bus */
 	struct mdio_device *mdio_map[PHY_MAX_ADDR];
 
-	/* PHY addresses to be ignored when probing */
+	/** @phy_mask: PHY addresses to be ignored when probing */
 	u32 phy_mask;
 
-	/* PHY addresses to ignore the TA/read failure */
+	/** @phy_ignore_ta_mask: PHY addresses to ignore the TA/read failure */
 	u32 phy_ignore_ta_mask;
 
-	/*
-	 * An array of interrupts, each PHY's interrupt at the index
+	/**
+	 * @irq: An array of interrupts, each PHY's interrupt at the index
 	 * matching its address
 	 */
 	int irq[PHY_MAX_ADDR];
 
-	/* GPIO reset pulse width in microseconds */
+	/** @reset_delay_us: GPIO reset pulse width in microseconds */
 	int reset_delay_us;
-	/* GPIO reset deassert delay in microseconds */
+	/** @reset_post_delay_us: GPIO reset deassert delay in microseconds */
 	int reset_post_delay_us;
-	/* RESET GPIO descriptor pointer */
+	/** @reset_gpiod: Reset GPIO descriptor pointer */
 	struct gpio_desc *reset_gpiod;
 
-	/* bus capabilities, used for probing */
+	/** @probe_capabilities: bus capabilities, used for probing */
 	enum {
 		MDIOBUS_NO_CAP = 0,
 		MDIOBUS_C22,
@@ -306,15 +370,22 @@ struct mii_bus {
 		MDIOBUS_C22_C45,
 	} probe_capabilities;
 
-	/* protect access to the shared element */
+	/** @shared_lock: protect access to the shared element */
 	struct mutex shared_lock;
 
-	/* shared state across different PHYs */
+	/** @shared: shared state across different PHYs */
 	struct phy_package_shared *shared[PHY_MAX_ADDR];
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
-struct mii_bus *mdiobus_alloc_size(size_t);
+struct mii_bus *mdiobus_alloc_size(size_t size);
+
+/**
+ * mdiobus_alloc - Allocate an MDIO bus structure
+ *
+ * The internal state of the MDIO bus will be set of MDIOBUS_ALLOCATED ready
+ * for the driver to register the bus.
+ */
 static inline struct mii_bus *mdiobus_alloc(void)
 {
 	return mdiobus_alloc_size(0);
@@ -341,40 +412,41 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
 #define PHY_INTERRUPT_DISABLED	false
 #define PHY_INTERRUPT_ENABLED	true
 
-/* PHY state machine states:
+/**
+ * enum phy_state - PHY state machine states:
  *
- * DOWN: PHY device and driver are not ready for anything.  probe
+ * @PHY_DOWN: PHY device and driver are not ready for anything.  probe
  * should be called if and only if the PHY is in this state,
  * given that the PHY device exists.
- * - PHY driver probe function will set the state to READY
+ * - PHY driver probe function will set the state to @PHY_READY
  *
- * READY: PHY is ready to send and receive packets, but the
+ * @PHY_READY: PHY is ready to send and receive packets, but the
  * controller is not.  By default, PHYs which do not implement
  * probe will be set to this state by phy_probe().
  * - start will set the state to UP
  *
- * UP: The PHY and attached device are ready to do work.
+ * @PHY_UP: The PHY and attached device are ready to do work.
  * Interrupts should be started here.
- * - timer moves to NOLINK or RUNNING
+ * - timer moves to @PHY_NOLINK or @PHY_RUNNING
  *
- * NOLINK: PHY is up, but not currently plugged in.
- * - irq or timer will set RUNNING if link comes back
- * - phy_stop moves to HALTED
+ * @PHY_NOLINK: PHY is up, but not currently plugged in.
+ * - irq or timer will set @PHY_RUNNING if link comes back
+ * - phy_stop moves to @PHY_HALTED
  *
- * RUNNING: PHY is currently up, running, and possibly sending
+ * @PHY_RUNNING: PHY is currently up, running, and possibly sending
  * and/or receiving packets
- * - irq or timer will set NOLINK if link goes down
- * - phy_stop moves to HALTED
+ * - irq or timer will set @PHY_NOLINK if link goes down
+ * - phy_stop moves to @PHY_HALTED
  *
- * CABLETEST: PHY is performing a cable test. Packet reception/sending
+ * @PHY_CABLETEST: PHY is performing a cable test. Packet reception/sending
  * is not expected to work, carrier will be indicated as down. PHY will be
  * poll once per second, or on interrupt for it current state.
  * Once complete, move to UP to restart the PHY.
- * - phy_stop aborts the running test and moves to HALTED
+ * - phy_stop aborts the running test and moves to @PHY_HALTED
  *
- * HALTED: PHY is up, but no polling or interrupts are done. Or
+ * @PHY_HALTED: PHY is up, but no polling or interrupts are done. Or
  * PHY is in an error state.
- * - phy_start moves to UP
+ * - phy_start moves to @PHY_UP
  */
 enum phy_state {
 	PHY_DOWN = 0,
@@ -403,34 +475,67 @@ struct phy_c45_device_ids {
 struct macsec_context;
 struct macsec_ops;
 
-/* phy_device: An instance of a PHY
+/**
+ * struct phy_device - An instance of a PHY
  *
- * drv: Pointer to the driver for this PHY instance
- * phy_id: UID for this device found during discovery
- * c45_ids: 802.3-c45 Device Identifers if is_c45.
- * is_c45:  Set to true if this phy uses clause 45 addressing.
- * is_internal: Set to true if this phy is internal to a MAC.
- * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
- * is_gigabit_capable: Set to true if PHY supports 1000Mbps
- * has_fixups: Set to true if this phy has fixups/quirks.
- * suspended: Set to true if this phy has been suspended successfully.
- * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus.
- * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
- * loopback_enabled: Set true if this phy has been loopbacked successfully.
- * downshifted_rate: Set true if link speed has been downshifted.
- * state: state of the PHY for management purposes
- * dev_flags: Device-specific flags used by the PHY driver.
- * irq: IRQ number of the PHY's interrupt (-1 if none)
- * phy_timer: The timer for handling the state machine
- * sfp_bus_attached: flag indicating whether the SFP bus has been attached
- * sfp_bus: SFP bus attached to this PHY's fiber port
- * attached_dev: The attached enet driver's device instance ptr
- * adjust_link: Callback for the enet controller to respond to
- * changes in the link state.
- * macsec_ops: MACsec offloading ops.
+ * @mdio: MDIO bus this PHY is on
+ * @drv: Pointer to the driver for this PHY instance
+ * @phy_id: UID for this device found during discovery
+ * @c45_ids: 802.3-c45 Device Identifiers if is_c45.
+ * @is_c45:  Set to true if this PHY uses clause 45 addressing.
+ * @is_internal: Set to true if this PHY is internal to a MAC.
+ * @is_pseudo_fixed_link: Set to true if this PHY is an Ethernet switch, etc.
+ * @is_gigabit_capable: Set to true if PHY supports 1000Mbps
+ * @has_fixups: Set to true if this PHY has fixups/quirks.
+ * @suspended: Set to true if this PHY has been suspended successfully.
+ * @suspended_by_mdio_bus: Set to true if this PHY was suspended by MDIO bus.
+ * @sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * @loopback_enabled: Set true if this PHY has been loopbacked successfully.
+ * @downshifted_rate: Set true if link speed has been downshifted.
+ * @state: State of the PHY for management purposes
+ * @dev_flags: Device-specific flags used by the PHY driver.
+ * @irq: IRQ number of the PHY's interrupt (-1 if none)
+ * @phy_timer: The timer for handling the state machine
+ * @phylink: Pointer to phylink instance for this PHY
+ * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached
+ * @sfp_bus: SFP bus attached to this PHY's fiber port
+ * @attached_dev: The attached enet driver's device instance ptr
+ * @adjust_link: Callback for the enet controller to respond to changes: in the
+ *               link state.
+ * @phy_link_change: Callback for phylink for notification of link change
+ * @macsec_ops: MACsec offloading ops.
  *
- * speed, duplex, pause, supported, advertising, lp_advertising,
- * and autoneg are used like in mii_if_info
+ * @speed: Current link speed
+ * @duplex: Current duplex
+ * @pause: Current pause
+ * @asym_pause: Current asymmetric pause
+ * @supported: Combined MAC/PHY supported linkmodes
+ * @advertising: Currently advertised linkmodes
+ * @adv_old: Saved advertised while power saving for WoL
+ * @lp_advertising: Current link partner advertised linkmodes
+ * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
+ * @autoneg: Flag autoneg being used
+ * @link: Current link state
+ * @autoneg_complete: Flag auto negotiation of the link has completed
+ * @mdix: Current crossover
+ * @mdix_ctrl: User setting of crossover
+ * @interrupts: Flag interrupts have been enabled
+ * @interface: enum phy_interface_t value
+ * @skb: Netlink message for cable diagnostics
+ * @nest: Netlink nest used for cable diagnostics
+ * @ehdr: nNtlink header for cable diagnostics
+ * @phy_led_triggers: Array of LED triggers
+ * @phy_num_led_triggers: Number of triggers in @phy_led_triggers
+ * @led_link_trigger: LED trigger for link up/down
+ * @last_triggered: last LED trigger for link speed
+ * @master_slave_set: User requested master/slave configuration
+ * @master_slave_get: Current master/slave advertisement
+ * @master_slave_state: Current master/slave configuration
+ * @mii_ts: Pointer to time stamper callbacks
+ * @lock:  Mutex for serialization access to PHY
+ * @state_queue: Work queue for state machine
+ * @shared: Pointer to private data shared by phys in one package
+ * @priv: Pointer to driver private data
  *
  * interrupts currently only supports enabled or disabled,
  * but could be changed in the future to support enabling
@@ -550,9 +655,18 @@ struct phy_device {
 #define to_phy_device(d) container_of(to_mdio_device(d), \
 				      struct phy_device, mdio)
 
-/* A structure containing possible configuration parameters
+/**
+ * struct phy_tdr_config - Configuration of a TDR raw test
+ *
+ * @first: Distance for first data collection point
+ * @last: Distance for last data collection point
+ * @step: Step between data collection points
+ * @pair: Bitmap of cable pairs to collect data for
+ *
+ * A structure containing possible configuration parameters
  * for a TDR cable test. The driver does not need to implement
  * all the parameters, but should report what is actually used.
+ * All distances are in centimeters.
  */
 struct phy_tdr_config {
 	u32 first;
@@ -562,18 +676,20 @@ struct phy_tdr_config {
 };
 #define PHY_PAIR_ALL -1
 
-/* struct phy_driver: Driver structure for a particular PHY type
+/**
+ * struct phy_driver - Driver structure for a particular PHY type
  *
- * driver_data: static driver data
- * phy_id: The result of reading the UID registers of this PHY
+ * @mdiodrv: Data common to all MDIO devices
+ * @phy_id: The result of reading the UID registers of this PHY
  *   type, and ANDing them with the phy_id_mask.  This driver
  *   only works for PHYs with IDs which match this field
- * name: The friendly name of this PHY type
- * phy_id_mask: Defines the important bits of the phy_id
- * features: A mandatory list of features (speed, duplex, etc)
+ * @name: The friendly name of this PHY type
+ * @phy_id_mask: Defines the important bits of the phy_id
+ * @features: A mandatory list of features (speed, duplex, etc)
  *   supported by this PHY
- * flags: A bitfield defining certain other features this PHY
+ * @flags: A bitfield defining certain other features this PHY
  *   supports (like interrupts)
+ * @driver_data: Static driver data
  *
  * All functions are optional. If config_aneg or read_status
  * are not implemented, the phy core uses the genphy versions.
@@ -592,151 +708,178 @@ struct phy_driver {
 	u32 flags;
 	const void *driver_data;
 
-	/*
-	 * Called to issue a PHY software reset
+	/**
+	 * @soft_reset: Called to issue a PHY software reset
 	 */
 	int (*soft_reset)(struct phy_device *phydev);
 
-	/*
-	 * Called to initialize the PHY,
+	/**
+	 * @config_init: Called to initialize the PHY,
 	 * including after a reset
 	 */
 	int (*config_init)(struct phy_device *phydev);
 
-	/*
-	 * Called during discovery.  Used to set
+	/**
+	 * @probe: Called during discovery.  Used to set
 	 * up device-specific structures, if any
 	 */
 	int (*probe)(struct phy_device *phydev);
 
-	/*
-	 * Probe the hardware to determine what abilities it has.
-	 * Should only set phydev->supported.
+	/**
+	 * @get_features: Probe the hardware to determine what
+	 * abilities it has.  Should only set phydev->supported.
 	 */
 	int (*get_features)(struct phy_device *phydev);
 
 	/* PHY Power Management */
+	/** @suspend: Suspend the hardware, saving state if needed */
 	int (*suspend)(struct phy_device *phydev);
+	/** @resume: Resume the hardware, restoring state if needed */
 	int (*resume)(struct phy_device *phydev);
 
-	/*
-	 * Configures the advertisement and resets
+	/**
+	 * @config_aneg: Configures the advertisement and resets
 	 * autonegotiation if phydev->autoneg is on,
 	 * forces the speed to the current settings in phydev
 	 * if phydev->autoneg is off
 	 */
 	int (*config_aneg)(struct phy_device *phydev);
 
-	/* Determines the auto negotiation result */
+	/** @aneg_done: Determines the auto negotiation result */
 	int (*aneg_done)(struct phy_device *phydev);
 
-	/* Determines the negotiated speed and duplex */
+	/** @read_status: Determines the negotiated speed and duplex */
 	int (*read_status)(struct phy_device *phydev);
 
-	/* Clears any pending interrupts */
+	/** @ack_interrupt: Clears any pending interrupts */
 	int (*ack_interrupt)(struct phy_device *phydev);
 
-	/* Enables or disables interrupts */
+	/** @config_intr: Enables or disables interrupts */
 	int (*config_intr)(struct phy_device *phydev);
 
-	/*
-	 * Checks if the PHY generated an interrupt.
+	/**
+	 * @did_interrupt: Checks if the PHY generated an interrupt.
 	 * For multi-PHY devices with shared PHY interrupt pin
 	 * Set interrupt bits have to be cleared.
 	 */
 	int (*did_interrupt)(struct phy_device *phydev);
 
-	/* Override default interrupt handling */
+	/** @handle_interrupt: Override default interrupt handling */
 	irqreturn_t (*handle_interrupt)(struct phy_device *phydev);
 
-	/* Clears up any memory if needed */
+	/** @remove: Clears up any memory if needed */
 	void (*remove)(struct phy_device *phydev);
 
-	/* Returns true if this is a suitable driver for the given
-	 * phydev.  If NULL, matching is based on phy_id and
-	 * phy_id_mask.
+	/**
+	 * @match_phy_device: Returns true if this is a suitable
+	 * driver for the given phydev.	 If NULL, matching is based on
+	 * phy_id and phy_id_mask.
 	 */
 	int (*match_phy_device)(struct phy_device *phydev);
 
-	/* Some devices (e.g. qnap TS-119P II) require PHY register changes to
-	 * enable Wake on LAN, so set_wol is provided to be called in the
-	 * ethernet driver's set_wol function. */
+	/**
+	 * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY
+	 * register changes to enable Wake on LAN, so set_wol is
+	 * provided to be called in the ethernet driver's set_wol
+	 * function.
+	 */
 	int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
 
-	/* See set_wol, but for checking whether Wake on LAN is enabled. */
+	/**
+	 * @get_wol: See set_wol, but for checking whether Wake on LAN
+	 * is enabled.
+	 */
 	void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
 
-	/*
-	 * Called to inform a PHY device driver when the core is about to
-	 * change the link state. This callback is supposed to be used as
-	 * fixup hook for drivers that need to take action when the link
-	 * state changes. Drivers are by no means allowed to mess with the
+	/**
+	 * @link_change_notify: Called to inform a PHY device driver
+	 * when the core is about to change the link state. This
+	 * callback is supposed to be used as fixup hook for drivers
+	 * that need to take action when the link state
+	 * changes. Drivers are by no means allowed to mess with the
 	 * PHY device structure in their implementations.
 	 */
 	void (*link_change_notify)(struct phy_device *dev);
 
-	/*
-	 * Phy specific driver override for reading a MMD register.
-	 * This function is optional for PHY specific drivers.  When
-	 * not provided, the default MMD read function will be used
-	 * by phy_read_mmd(), which will use either a direct read for
-	 * Clause 45 PHYs or an indirect read for Clause 22 PHYs.
-	 *  devnum is the MMD device number within the PHY device,
-	 *  regnum is the register within the selected MMD device.
+	/**
+	 * @read_mmd: PHY specific driver override for reading a MMD
+	 * register.  This function is optional for PHY specific
+	 * drivers.  When not provided, the default MMD read function
+	 * will be used by phy_read_mmd(), which will use either a
+	 * direct read for Clause 45 PHYs or an indirect read for
+	 * Clause 22 PHYs.  devnum is the MMD device number within the
+	 * PHY device, regnum is the register within the selected MMD
+	 * device.
 	 */
 	int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum);
 
-	/*
-	 * Phy specific driver override for writing a MMD register.
-	 * This function is optional for PHY specific drivers.  When
-	 * not provided, the default MMD write function will be used
-	 * by phy_write_mmd(), which will use either a direct write for
-	 * Clause 45 PHYs, or an indirect write for Clause 22 PHYs.
-	 *  devnum is the MMD device number within the PHY device,
-	 *  regnum is the register within the selected MMD device.
-	 *  val is the value to be written.
+	/**
+	 * @write_mmd: PHY specific driver override for writing a MMD
+	 * register.  This function is optional for PHY specific
+	 * drivers.  When not provided, the default MMD write function
+	 * will be used by phy_write_mmd(), which will use either a
+	 * direct write for Clause 45 PHYs, or an indirect write for
+	 * Clause 22 PHYs.  devnum is the MMD device number within the
+	 * PHY device, regnum is the register within the selected MMD
+	 * device.  val is the value to be written.
 	 */
 	int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum,
 			 u16 val);
 
+	/** @read_page: Return the current PHY register page number */
 	int (*read_page)(struct phy_device *dev);
+	/** @write_page: Set the current PHY register page number */
 	int (*write_page)(struct phy_device *dev, int page);
 
-	/* Get the size and type of the eeprom contained within a plug-in
-	 * module */
+	/**
+	 * @module_info: Get the size and type of the eeprom contained
+	 * within a plug-in module
+	 */
 	int (*module_info)(struct phy_device *dev,
 			   struct ethtool_modinfo *modinfo);
 
-	/* Get the eeprom information from the plug-in module */
+	/**
+	 * @module_eeprom: Get the eeprom information from the plug-in
+	 * module
+	 */
 	int (*module_eeprom)(struct phy_device *dev,
 			     struct ethtool_eeprom *ee, u8 *data);
 
-	/* Start a cable test */
+	/** @cable_test_start: Start a cable test */
 	int (*cable_test_start)(struct phy_device *dev);
 
-	/* Start a raw TDR cable test */
+	/**  @cable_test_tdr_start: Start a raw TDR cable test */
 	int (*cable_test_tdr_start)(struct phy_device *dev,
 				    const struct phy_tdr_config *config);
 
-	/* Once per second, or on interrupt, request the status of the
-	 * test.
+	/**
+	 * @cable_test_get_status: Once per second, or on interrupt,
+	 * request the status of the test.
 	 */
 	int (*cable_test_get_status)(struct phy_device *dev, bool *finished);
 
-	/* Get statistics from the phy using ethtool */
+	/* Get statistics from the PHY using ethtool */
+	/** @get_sset_count: Number of statistic counters */
 	int (*get_sset_count)(struct phy_device *dev);
+	/** @get_strings: Names of the statistic counters */
 	void (*get_strings)(struct phy_device *dev, u8 *data);
+	/** @get_stats: Return the statistic counter values */
 	void (*get_stats)(struct phy_device *dev,
 			  struct ethtool_stats *stats, u64 *data);
 
 	/* Get and Set PHY tunables */
+	/** @get_tunable: Return the value of a tunable */
 	int (*get_tunable)(struct phy_device *dev,
 			   struct ethtool_tunable *tuna, void *data);
+	/** @set_tunable: Set the value of a tunable */
 	int (*set_tunable)(struct phy_device *dev,
 			    struct ethtool_tunable *tuna,
 			    const void *data);
+	/** @set_loopback: Set the loopback mood of the PHY */
 	int (*set_loopback)(struct phy_device *dev, bool enable);
+	/** @get_sqi: Get the signal quality indication */
 	int (*get_sqi)(struct phy_device *dev);
+	/** @get_sqi_max: Get the maximum signal quality indication */
 	int (*get_sqi_max)(struct phy_device *dev);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
@@ -890,6 +1033,24 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum,
  */
 int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 
+/**
+ * phy_read_mmd_poll_timeout - Periodically poll a PHY register until a
+ *                             condition is met or a timeout occurs
+ *
+ * @phydev: The phy_device struct
+ * @devaddr: The MMD to read from
+ * @regnum: The register on the MMD to read
+ * @val: Variable to read the register into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0
+ *            tight-loops).  Should be less than ~20ms since usleep_range
+ *            is used (see Documentation/timers/timers-howto.rst).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ * @sleep_before_read: if it is true, sleep @sleep_us before read.
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
 #define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \
 				  sleep_us, timeout_us, sleep_before_read) \
 ({ \
@@ -1161,7 +1322,7 @@ static inline bool phy_is_internal(struct phy_device *phydev)
 /**
  * phy_interface_mode_is_rgmii - Convenience function for testing if a
  * PHY interface mode is RGMII (all variants)
- * @mode: the phy_interface_t enum
+ * @mode: the &phy_interface_t enum
  */
 static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
 {
@@ -1170,11 +1331,11 @@ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
 };
 
 /**
- * phy_interface_mode_is_8023z() - does the phy interface mode use 802.3z
+ * phy_interface_mode_is_8023z() - does the PHY interface mode use 802.3z
  *   negotiation
  * @mode: one of &enum phy_interface_t
  *
- * Returns true if the phy interface mode uses the 16-bit negotiation
+ * Returns true if the PHY interface mode uses the 16-bit negotiation
  * word as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding)
  */
 static inline bool phy_interface_mode_is_8023z(phy_interface_t mode)
@@ -1193,7 +1354,7 @@ static inline bool phy_interface_is_rgmii(struct phy_device *phydev)
 	return phy_interface_mode_is_rgmii(phydev->interface);
 };
 
-/*
+/**
  * phy_is_pseudo_fixed_link - Convenience function for testing if this
  * PHY is the CPU port facing side of an Ethernet switch, or similar.
  * @phydev: the phy_device struct
-- 
cgit v1.2.3


From c9e7c76d70fa50582ca96759829c93d0dd024662 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 21 Sep 2020 15:36:51 +0100
Subject: xfrm: Provide API to register translator module

Add a skeleton for xfrm_compat module and provide API to register it in
xfrm_state.ko. struct xfrm_translator will have function pointers to
translate messages received from 32-bit userspace or to be sent to it
from 64-bit kernel.
module_get()/module_put() are used instead of rcu_read_lock() as the
module will vmalloc() memory for translation.
The new API is registered with xfrm_state module, not with xfrm_user as
the former needs translator for user_policy set by setsockopt() and
xfrm_user already uses functions from xfrm_state.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2737d24ec244..fe2e3717da14 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2000,6 +2000,25 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 	return 0;
 }
 
+struct xfrm_translator {
+	struct module *owner;
+};
+
+#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
+extern int xfrm_register_translator(struct xfrm_translator *xtr);
+extern int xfrm_unregister_translator(struct xfrm_translator *xtr);
+extern struct xfrm_translator *xfrm_get_translator(void);
+extern void xfrm_put_translator(struct xfrm_translator *xtr);
+#else
+static inline struct xfrm_translator *xfrm_get_translator(void)
+{
+	return NULL;
+}
+static inline void xfrm_put_translator(struct xfrm_translator *xtr)
+{
+}
+#endif
+
 #if IS_ENABLED(CONFIG_IPV6)
 static inline bool xfrm6_local_dontfrag(const struct sock *sk)
 {
-- 
cgit v1.2.3


From 5461fc0c8d9f23956b99f5907f69726a293ccb67 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 21 Sep 2020 15:36:52 +0100
Subject: xfrm/compat: Add 64=>32-bit messages translator

Provide the kernel-to-user translator under XFRM_USER_COMPAT, that
creates for 64-bit xfrm-user message a 32-bit translation and puts it
in skb's frag_list. net/compat.c layer provides MSG_CMSG_COMPAT to
decide if the message should be taken from skb or frag_list.
(used by wext-core which has also an ABI difference)

Kernel sends 64-bit xfrm messages to the userspace for:
- multicast (monitor events)
- netlink dumps

Wire up the translator to xfrm_nlmsg_multicast().

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fe2e3717da14..5b6cc62c9354 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2000,7 +2000,12 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 	return 0;
 }
 
+extern const int xfrm_msg_min[XFRM_NR_MSGTYPES];
+
 struct xfrm_translator {
+	/* Allocate frag_list and put compat translation there */
+	int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src);
+
 	struct module *owner;
 };
 
-- 
cgit v1.2.3


From 5106f4a8acff480e244300bc5097c0ad7048c3a2 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 21 Sep 2020 15:36:55 +0100
Subject: xfrm/compat: Add 32=>64-bit messages translator

Provide the user-to-kernel translator under XFRM_USER_COMPAT, that
creates for 32-bit xfrm-user message a 64-bit translation.
The translation is afterwards reused by xfrm_user code just as if
userspace had sent 64-bit message.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5b6cc62c9354..fa18cb6bb3f7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2001,11 +2001,17 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 }
 
 extern const int xfrm_msg_min[XFRM_NR_MSGTYPES];
+extern const struct nla_policy xfrma_policy[XFRMA_MAX+1];
 
 struct xfrm_translator {
 	/* Allocate frag_list and put compat translation there */
 	int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src);
 
+	/* Allocate nlmsg with 64-bit translaton of received 32-bit message */
+	struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh,
+			int maxtype, const struct nla_policy *policy,
+			struct netlink_ext_ack *extack);
+
 	struct module *owner;
 };
 
-- 
cgit v1.2.3


From 96392ee5a13b992563cfe07d23ee30d333b89126 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 21 Sep 2020 15:36:56 +0100
Subject: xfrm/compat: Translate 32-bit user_policy from sockptr

Provide compat_xfrm_userpolicy_info translation for xfrm setsocketopt().
Reallocate buffer and put the missing padding for 64-bit message.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fa18cb6bb3f7..53618a31634b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2012,6 +2012,9 @@ struct xfrm_translator {
 			int maxtype, const struct nla_policy *policy,
 			struct netlink_ext_ack *extack);
 
+	/* Translate 32-bit user_policy from sockptr */
+	int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen);
+
 	struct module *owner;
 };
 
-- 
cgit v1.2.3


From e2f9a8fe73d3a29edfdb4215e7596c95b6db362d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 23 Sep 2020 14:24:20 +0300
Subject: net: mscc: ocelot: always pass skb clone to
 ocelot_port_add_txtstamp_skb

Currently, ocelot switchdev passes the skb directly to the function that
enqueues it to the list of skb's awaiting a TX timestamp. Whereas the
felix DSA driver first clones the skb, then passes the clone to this
queue.

This matters because in the case of felix, the common IRQ handler, which
is ocelot_get_txtstamp(), currently clones the clone, and frees the
original clone. This is useless and can be simplified by using
skb_complete_tx_timestamp() instead of skb_tstamp_tx().

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 0ac4e7fba086..3105bbb6cdcf 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -710,8 +710,8 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
 int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
 int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr);
 int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr);
-int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port,
-				 struct sk_buff *skb);
+void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
+				  struct sk_buff *clone);
 void ocelot_get_txtstamp(struct ocelot *ocelot);
 void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu);
 int ocelot_get_max_mtu(struct ocelot *ocelot, int port);
-- 
cgit v1.2.3


From b5b6775d72e8662b5ea7f892bf06db4831e2c1aa Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 23 Sep 2020 18:41:22 +0300
Subject: of: add of_mdio_find_device() api

Add a helper function which finds the mdio_device structure given a
device tree node. This is helpful for finding the PCS device based on a
DTS node but managing it as a mdio_device instead of a phy_device.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 1efb88d9f892..cfe8c607a628 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -17,6 +17,7 @@ bool of_mdiobus_child_is_phy(struct device_node *child);
 int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
 int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
 			     struct device_node *np);
+struct mdio_device *of_mdio_find_device(struct device_node *np);
 struct phy_device *of_phy_find_device(struct device_node *phy_np);
 struct phy_device *
 of_phy_connect(struct net_device *dev, struct device_node *phy_np,
@@ -74,6 +75,11 @@ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *
 	return mdiobus_register(mdio);
 }
 
+static inline struct mdio_device *of_mdio_find_device(struct device_node *np)
+{
+	return NULL;
+}
+
 static inline struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
 	return NULL;
-- 
cgit v1.2.3


From 08b81d873126b413cda511b1ea1cbb0e99938bbd Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 24 Sep 2020 08:30:01 +0800
Subject: mptcp: add sk_stop_timer_sync helper

This patch added a new helper sk_stop_timer_sync, it deactivates a timer
like sk_stop_timer, but waits for the handler to finish.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index eaa5cac5e836..a5c6ae78df77 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2195,6 +2195,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
 
 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 
+void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer);
+
 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
 			struct sk_buff *skb, unsigned int flags,
 			void (*destructor)(struct sock *sk,
-- 
cgit v1.2.3


From 77d0cab93909edf1e5740643d788ac2e458b8187 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 25 Sep 2020 01:23:02 +0200
Subject: net: tcp: drop unused function argument from mptcp_incoming_options

Since commit cfde141ea3faa30e ("mptcp: move option parsing into
mptcp_incoming_options()"), the 3rd function argument is no longer used.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 3525d2822abe..753ba7e755d6 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -85,8 +85,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts);
-void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
-			    struct tcp_options_received *opt_rx);
+void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
 
 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
 
@@ -185,8 +184,7 @@ static inline bool mptcp_established_options(struct sock *sk,
 }
 
 static inline void mptcp_incoming_options(struct sock *sk,
-					  struct sk_buff *skb,
-					  struct tcp_options_received *opt_rx)
+					  struct sk_buff *skb)
 {
 }
 
-- 
cgit v1.2.3


From f19425641cb2572a33cb074d5e30283720bd4d22 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 6 Aug 2020 11:17:12 -0700
Subject: Bluetooth: L2CAP: Fix calling sk_filter on non-socket based channel

Only sockets will have the chan->data set to an actual sk, channels
like A2MP would have its own data which would likely cause a crash when
calling sk_filter, in order to fix this a new callback has been
introduced so channels can implement their own filtering if necessary.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 8f1e6a7a2df8..1d1232917de7 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -665,6 +665,8 @@ struct l2cap_ops {
 	struct sk_buff		*(*alloc_skb) (struct l2cap_chan *chan,
 					       unsigned long hdr_len,
 					       unsigned long len, int nb);
+	int			(*filter) (struct l2cap_chan * chan,
+					   struct sk_buff *skb);
 };
 
 struct l2cap_conn {
-- 
cgit v1.2.3


From 1df8f55a37bd286a3d40192980050bc3d7d78887 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:03:50 -0700
Subject: bpf: Enable bpf_skc_to_* sock casting helper to networking prog type

There is a constant need to add more fields into the bpf_tcp_sock
for the bpf programs running at tc, sock_ops...etc.

A current workaround could be to use bpf_probe_read_kernel().  However,
other than making another helper call for reading each field and missing
CO-RE, it is also not as intuitive to use as directly reading
"tp->lsndtime" for example.  While already having perfmon cap to do
bpf_probe_read_kernel(), it will be much easier if the bpf prog can
directly read from the tcp_sock.

This patch tries to do that by using the existing casting-helpers
bpf_skc_to_*() whose func_proto returns a btf_id.  For example, the
func_proto of bpf_skc_to_tcp_sock returns the btf_id of the
kernel "struct tcp_sock".

These helpers are also added to is_ptr_cast_function().
It ensures the returning reg (BPF_REF_0) will also carries the ref_obj_id.
That will keep the ref-tracking works properly.

The bpf_skc_to_* helpers are made available to most of the bpf prog
types in filter.c. The bpf_skc_to_* helpers will be limited by
perfmon cap.

This patch adds a ARG_PTR_TO_BTF_ID_SOCK_COMMON.  The helper accepting
this arg can accept a btf-id-ptr (PTR_TO_BTF_ID + &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON])
or a legacy-ctx-convert-skc-ptr (PTR_TO_SOCK_COMMON).  The bpf_skc_to_*()
helpers are changed to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that
they will accept pointer obtained from skb->sk.

Instead of specifying both arg_type and arg_btf_id in the same func_proto
which is how the current ARG_PTR_TO_BTF_ID does, the arg_btf_id of
the new ARG_PTR_TO_BTF_ID_SOCK_COMMON is specified in the
compatible_reg_types[] in verifier.c.  The reason is the arg_btf_id is
always the same.  Discussion in this thread:
https://lore.kernel.org/bpf/20200922070422.1917351-1-kafai@fb.com/

The ARG_PTR_TO_BTF_ID_ part gives a clear expectation that the helper is
expecting a PTR_TO_BTF_ID which could be NULL.  This is the same
behavior as the existing helper taking ARG_PTR_TO_BTF_ID.

The _SOCK_COMMON part means the helper is also expecting the legacy
SOCK_COMMON pointer.

By excluding the _OR_NULL part, the bpf prog cannot call helper
with a literal NULL which doesn't make sense in most cases.
e.g. bpf_skc_to_tcp_sock(NULL) will be rejected.  All PTR_TO_*_OR_NULL
reg has to do a NULL check first before passing into the helper or else
the bpf prog will be rejected.  This behavior is nothing new and
consistent with the current expectation during bpf-prog-load.

[ ARG_PTR_TO_BTF_ID_SOCK_COMMON will be used to replace
  ARG_PTR_TO_SOCK* of other existing helpers later such that
  those existing helpers can take the PTR_TO_BTF_ID returned by
  the bpf_skc_to_*() helpers.

  The only special case is bpf_sk_lookup_assign() which can accept a
  literal NULL ptr.  It has to be handled specially in another follow
  up patch if there is a need (e.g. by renaming ARG_PTR_TO_SOCKET_OR_NULL
  to ARG_PTR_TO_BTF_ID_SOCK_COMMON_OR_NULL). ]

[ When converting the older helpers that take ARG_PTR_TO_SOCK* in
  the later patch, if the kernel does not support BTF,
  ARG_PTR_TO_BTF_ID_SOCK_COMMON will behave like ARG_PTR_TO_SOCK_COMMON
  because no reg->type could have PTR_TO_BTF_ID in this case.

  It is not a concern for the newer-btf-only helper like the bpf_skc_to_*()
  here though because these helpers must require BTF vmlinux to begin
  with. ]

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200925000350.3855720-1-kafai@fb.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fc5c901c7542..d0937f1d2980 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -292,6 +292,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
 	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
+	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	__BPF_ARG_TYPE_MAX,
 };
 
-- 
cgit v1.2.3


From a5fa25adf03d4b063aece74ba70ccbb3a71af122 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:03:56 -0700
Subject: bpf: Change bpf_sk_release and bpf_sk_*cgroup_id to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

The previous patch allows the networking bpf prog to use the
bpf_skc_to_*() helpers to get a PTR_TO_BTF_ID socket pointer,
e.g. "struct tcp_sock *".  It allows the bpf prog to read all the
fields of the tcp_sock.

This patch changes the bpf_sk_release() and bpf_sk_*cgroup_id()
to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will
work with the pointer returned by the bpf_skc_to_*() helpers
also.  For example, the following will work:

	sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
	if (!sk)
		return;
	tp = bpf_skc_to_tcp_sock(sk);
	if (!tp) {
		bpf_sk_release(sk);
		return;
	}
	lsndtime = tp->lsndtime;
	/* Pass tp to bpf_sk_release() will also work */
	bpf_sk_release(tp);

Since PTR_TO_BTF_ID could be NULL, the helper taking
ARG_PTR_TO_BTF_ID_SOCK_COMMON has to check for NULL at runtime.

A btf_id of "struct sock" may not always mean a fullsock.  Regardless
the helper's running context may get a non-fullsock or not,
considering fullsock check/handling is pretty cheap, it is better to
keep the same verifier expectation on helper that takes ARG_PTR_TO_BTF_ID*
will be able to handle the minisock situation.  In the bpf_sk_*cgroup_id()
case,  it will try to get a fullsock by using sk_to_full_sk() as its
skb variant bpf_sk"b"_*cgroup_id() has already been doing.

bpf_sk_release can already handle minisock, so nothing special has to
be done.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200925000356.3856047-1-kafai@fb.com
---
 include/uapi/linux/bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a22812561064..c96a56d9c3be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2512,7 +2512,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
  *	Description
  *		Release the reference held by *sock*. *sock* must be a
  *		non-**NULL** pointer that was returned from
@@ -3234,11 +3234,11 @@ union bpf_attr {
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
  *	Description
  *		Return the cgroup v2 id of the socket *sk*.
  *
- *		*sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *		*sk* must be a non-**NULL** pointer to a socket, e.g. one
  *		returned from **bpf_sk_lookup_xxx**\ (),
  *		**bpf_sk_fullsock**\ (), etc. The format of returned id is
  *		same as in **bpf_skb_cgroup_id**\ ().
@@ -3248,7 +3248,7 @@ union bpf_attr {
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
  *	Description
  *		Return id of cgroup v2 that is ancestor of cgroup associated
  *		with the *sk* at the *ancestor_level*.  The root cgroup is at
-- 
cgit v1.2.3


From 592a3498648af000e93dff2d36229ab11cd8c7f6 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:02 -0700
Subject: bpf: Change bpf_sk_storage_*() to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_sk_storage_*() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

A micro benchmark has been done on a "cgroup_skb/egress" bpf program
which does a bpf_sk_storage_get().  It was driven by netperf doing
a 4096 connected UDP_STREAM test with 64bytes packet.
The stats from "kernel.bpf_stats_enabled" shows no meaningful difference.

The sk_storage_get_btf_proto, sk_storage_delete_btf_proto,
btf_sk_storage_get_proto, and btf_sk_storage_delete_proto are
no longer needed, so they are removed.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20200925000402.3856307-1-kafai@fb.com
---
 include/net/bpf_sk_storage.h | 2 --
 include/uapi/linux/bpf.h     | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 119f4c9c3a9c..3c516dd07caf 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk);
 
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
-extern const struct bpf_func_proto sk_storage_get_btf_proto;
-extern const struct bpf_func_proto sk_storage_delete_btf_proto;
 
 struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c96a56d9c3be..0ec6dbeb17a5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2861,6 +2861,7 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
+ *		**-EINVAL** if sk is not a fullsock (e.g. a request_sock).
  *
  * long bpf_send_signal(u32 sig)
  *	Description
-- 
cgit v1.2.3


From c0df236e1394970f3503a8fb103de95d000014ca Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:09 -0700
Subject: bpf: Change bpf_tcp_*_syncookie to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_tcp_*_syncookie() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20200925000409.3856725-1-kafai@fb.com
---
 include/uapi/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ec6dbeb17a5..69b9e30375bc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2692,7 +2692,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  * 	Description
  * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
  * 		the listening socket in *sk*.
@@ -2878,7 +2878,7 @@ union bpf_attr {
  *
  *		**-EAGAIN** if bpf program can try again.
  *
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *	Description
  *		Try to issue a SYN cookie for the packet with corresponding
  *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
-- 
cgit v1.2.3


From 27e5203bd9c5cc6d54dcac48c3027f3f04522b8b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:15 -0700
Subject: bpf: Change bpf_sk_assign to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_sk_assign() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

The bpf_sk_lookup_assign() is taking ARG_PTR_TO_SOCKET_"OR_NULL".  Meaning
it specifically takes a literal NULL.  ARG_PTR_TO_BTF_ID_SOCK_COMMON
does not allow a literal NULL, so another ARG type is required
for this purpose and another follow-up patch can be used if
there is such need.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200925000415.3857374-1-kafai@fb.com
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69b9e30375bc..2d6519a2ed77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3107,7 +3107,7 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
  *	Description
  *		Helper is overloaded depending on BPF program type. This
  *		description applies to **BPF_PROG_TYPE_SCHED_CLS** and
-- 
cgit v1.2.3


From e0f9956a3862b32ad73869a8e52a33c84aafa46f Mon Sep 17 00:00:00 2001
From: "Chuah, Kim Tatt" <kim.tatt.chuah@intel.com>
Date: Fri, 25 Sep 2020 17:40:41 +0800
Subject: net: stmmac: Add option for VLAN filter fail queue enable

Add option in plat_stmmacenet_data struct to enable VLAN Filter Fail
Queuing. This option allows packets that fail VLAN filter to be routed
to a specific Rx queue when Receive All is also set.

When this option is enabled:
- Enable VFFQ only when entering promiscuous mode, because Receive All
  will pass up all rx packets that failed address filtering (similar to
  promiscuous mode).
- VLAN-promiscuous mode is never entered to allow rx packet to fail VLAN
  filters and get routed to selected VFFQ Rx queue.

Reviewed-by: Voon Weifeng <weifeng.voon@intel.com>
Reviewed-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Chuah, Kim Tatt <kim.tatt.chuah@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index bd964c31d333..00e83c877496 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -198,5 +198,7 @@ struct plat_stmmacenet_data {
 	int mac_port_sel_speed;
 	bool en_tx_lpi_clockgating;
 	int has_xgmac;
+	bool vlan_fail_q_en;
+	u8 vlan_fail_q;
 };
 #endif
-- 
cgit v1.2.3


From ba5f4cfeac77fca981b199ec7f2396a3616e5216 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 24 Sep 2020 12:58:40 -0700
Subject: bpf: Add comment to document BTF type PTR_TO_BTF_ID_OR_NULL

The meaning of PTR_TO_BTF_ID_OR_NULL differs slightly from other types
denoted with the *_OR_NULL type. For example the types PTR_TO_SOCKET
and PTR_TO_SOCKET_OR_NULL can be used for branch analysis because the
type PTR_TO_SOCKET is guaranteed to _not_ have a null value.

In contrast PTR_TO_BTF_ID and BTF_TO_BTF_ID_OR_NULL have slightly
different meanings. A PTR_TO_BTF_TO_ID may be a pointer to NULL value,
but it is safe to read this pointer in the program context because
the program context will handle any faults. The fallout is for
PTR_TO_BTF_ID the verifier can assume reads are safe, but can not
use the type in branch analysis. Additionally, authors need to be
extra careful when passing PTR_TO_BTF_ID into helpers. In general
helpers consuming type PTR_TO_BTF_ID will need to assume it may
be null.

Seeing the above is not obvious to readers without the back knowledge
lets add a comment in the type definition.

Editorial comment, as networking and tracing programs get closer
and more tightly merged we may need to consider a new type that we
can ensure is non-null for branch analysis and also passing into
helpers.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/linux/bpf.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d0937f1d2980..79902325bef8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -383,8 +383,22 @@ enum bpf_reg_type {
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
-	PTR_TO_BTF_ID,		 /* reg points to kernel struct */
-	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
+	/* PTR_TO_BTF_ID points to a kernel struct that does not need
+	 * to be null checked by the BPF program. This does not imply the
+	 * pointer is _not_ null and in practice this can easily be a null
+	 * pointer when reading pointer chains. The assumption is program
+	 * context will handle null pointer dereference typically via fault
+	 * handling. The verifier must keep this in mind and can make no
+	 * assumptions about null or non-null when doing branch analysis.
+	 * Further, when passed into helpers the helpers can not, without
+	 * additional context, assume the value is non-null.
+	 */
+	PTR_TO_BTF_ID,
+	/* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
+	 * been checked for null. Used primarily to inform the verifier
+	 * an explicit null check is required for this struct.
+	 */
+	PTR_TO_BTF_ID_OR_NULL,
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
 	PTR_TO_RDONLY_BUF,	 /* reg points to a readonly buffer */
-- 
cgit v1.2.3


From 22ec3d232f8511b21355fcdb6fb2a4eced3decd8 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 25 Sep 2020 13:46:05 -0700
Subject: devlink: check flash_update parameter support in net core

When implementing .flash_update, drivers which do not support
per-component update are manually checking the component parameter to
verify that it is NULL. Without this check, the driver might accept an
update request with a component specified even though it will not honor
such a request.

Instead of having each driver check this, move the logic into
net/core/devlink.c, and use a new `supported_flash_update_params` field
in the devlink_ops. Drivers which will support per-component update must
now specify this by setting DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT in
the supported_flash_update_params in their devlink_ops.

This helps ensure that drivers do not forget to check for a NULL
component if they do not support per-component update. This also enables
a slightly better error message by enabling the core stack to set the
netlink bad attribute message to indicate precisely the unsupported
attribute in the message.

Going forward, any new additional parameter to flash update will require
a bit in the supported_flash_update_params bitfield.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Bin Luo <luobin9@huawei.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Ido Schimmel <idosch@mellanox.com>
Cc: Danielle Ratson <danieller@mellanox.com>
Cc: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4883dbae7faf..cec6b4f109fa 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -550,6 +550,8 @@ enum devlink_param_generic_id {
 /* Firmware bundle identifier */
 #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID	"fw.bundle_id"
 
+#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT	BIT(0)
+
 struct devlink_region;
 struct devlink_info_req;
 
@@ -1037,6 +1039,12 @@ enum devlink_trap_group_generic_id {
 	}
 
 struct devlink_ops {
+	/**
+	 * @supported_flash_update_params:
+	 * mask of parameters supported by the driver's .flash_update
+	 * implemementation.
+	 */
+	u32 supported_flash_update_params;
 	int (*reload_down)(struct devlink *devlink, bool netns_change,
 			   struct netlink_ext_ack *extack);
 	int (*reload_up)(struct devlink *devlink,
@@ -1097,6 +1105,13 @@ struct devlink_ops {
 				      struct netlink_ext_ack *extack);
 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
 			struct netlink_ext_ack *extack);
+	/**
+	 * @flash_update: Device flash update function
+	 *
+	 * Used to perform a flash update for the device. The set of
+	 * parameters supported by the driver should be set in
+	 * supported_flash_update_params.
+	 */
 	int (*flash_update)(struct devlink *devlink, const char *file_name,
 			    const char *component,
 			    struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From bc75c054f04048517e0b153ab38d973bbcdcef59 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 25 Sep 2020 13:46:06 -0700
Subject: devlink: convert flash_update to use params structure

The devlink core recently gained support for checking whether the driver
supports a flash_update parameter, via `supported_flash_update_params`.
However, parameters are specified as function arguments. Adding a new
parameter still requires modifying the signature of the .flash_update
callback in all drivers.

Convert the .flash_update function to take a new `struct
devlink_flash_update_params` instead. By using this structure, and the
`supported_flash_update_params` bit field, a new parameter to
flash_update can be added without requiring modification to existing
drivers.

As before, all parameters except file_name will require driver opt-in.
Because file_name is a necessary field to for the flash_update to make
sense, no "SUPPORTED" bitflag is provided and it is always considered
valid. All future additional parameters will require a new bit in the
supported_flash_update_params bitfield.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Bin Luo <luobin9@huawei.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Ido Schimmel <idosch@mellanox.com>
Cc: Danielle Ratson <danieller@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index cec6b4f109fa..7794e1601772 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -550,6 +550,20 @@ enum devlink_param_generic_id {
 /* Firmware bundle identifier */
 #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID	"fw.bundle_id"
 
+/**
+ * struct devlink_flash_update_params - Flash Update parameters
+ * @file_name: the name of the flash firmware file to update from
+ * @component: the flash component to update
+ *
+ * With the exception of file_name, drivers must opt-in to parameters by
+ * setting the appropriate bit in the supported_flash_update_params field in
+ * their devlink_ops structure.
+ */
+struct devlink_flash_update_params {
+	const char *file_name;
+	const char *component;
+};
+
 #define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT	BIT(0)
 
 struct devlink_region;
@@ -1112,8 +1126,8 @@ struct devlink_ops {
 	 * parameters supported by the driver should be set in
 	 * supported_flash_update_params.
 	 */
-	int (*flash_update)(struct devlink *devlink, const char *file_name,
-			    const char *component,
+	int (*flash_update)(struct devlink *devlink,
+			    struct devlink_flash_update_params *params,
 			    struct netlink_ext_ack *extack);
 	/**
 	 * @trap_init: Trap initialization function.
-- 
cgit v1.2.3


From 5d5b4128c4caae34ddcd9b2dc30ac4d6155617a3 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 25 Sep 2020 13:46:07 -0700
Subject: devlink: introduce flash update overwrite mask

Sections of device flash may contain settings or device identifying
information. When performing a flash update, it is generally expected
that these settings and identifiers are not overwritten.

However, it may sometimes be useful to allow overwriting these fields
when performing a flash update. Some examples include, 1) customizing
the initial device config on first programming, such as overwriting
default device identifying information, or 2) reverting a device
configuration to known good state provided in the new firmware image, or
3) in case it is suspected that current firmware logic for managing the
preservation of fields during an update is broken.

Although some devices are able to completely separate these types of
settings and fields into separate components, this is not true for all
hardware.

To support controlling this behavior, a new
DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK is defined. This is an
nla_bitfield32 which will define what subset of fields in a component
should be overwritten during an update.

If no bits are specified, or of the overwrite mask is not provided, then
an update should not overwrite anything, and should maintain the
settings and identifiers as they are in the previous image.

If the overwrite mask has the DEVLINK_FLASH_OVERWRITE_SETTINGS bit set,
then the device should be configured to overwrite any of the settings in
the requested component with settings found in the provided image.

Similarly, if the DEVLINK_FLASH_OVERWRITE_IDENTIFIERS bit is set, the
device should be configured to overwrite any device identifiers in the
requested component with the identifiers from the image.

Multiple overwrite modes may be combined to indicate that a combination
of the set of fields that should be overwritten.

Drivers which support the new overwrite mask must set the
DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK in the
supported_flash_update_params field of their devlink_ops.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  4 +++-
 include/uapi/linux/devlink.h | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7794e1601772..7339bf9ba6b4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -562,9 +562,11 @@ enum devlink_param_generic_id {
 struct devlink_flash_update_params {
 	const char *file_name;
 	const char *component;
+	u32 overwrite_mask;
 };
 
-#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT	BIT(0)
+#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT		BIT(0)
+#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK	BIT(1)
 
 struct devlink_region;
 struct devlink_info_req;
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index a2ecc8b00611..7b0face1bad5 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -230,6 +230,28 @@ enum {
 	DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
 };
 
+/* Specify what sections of a flash component can be overwritten when
+ * performing an update. Overwriting of firmware binary sections is always
+ * implicitly assumed to be allowed.
+ *
+ * Each section must be documented in
+ * Documentation/networking/devlink/devlink-flash.rst
+ *
+ */
+enum {
+	DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT,
+	DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT,
+
+	__DEVLINK_FLASH_OVERWRITE_MAX_BIT,
+	DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1
+};
+
+#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT)
+#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT)
+
+#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \
+	(_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1)
+
 /**
  * enum devlink_trap_action - Packet trap action.
  * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
@@ -464,6 +486,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
 
 	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From 2d44b097bbb9d0af0f3b94304fee4b639ab14171 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:01 +0300
Subject: net: mscc: ocelot: move NPI port configuration to DSA

Remove the ocelot_configure_cpu() function, which was in fact bringing
up 2 ports: the CPU port module, which both switchdev and DSA have, and
the NPI port, which only DSA has.

The (non-Ethernet) CPU port module is at a fixed index in the analyzer,
whereas the NPI port is selected through the "ethernet" property in the
device tree.

Therefore, the function to set up an NPI port is DSA-specific, so we
move it there, simplifying the ocelot switch library a little bit.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: UNGLinuxDriver <UNGLinuxDriver@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 3105bbb6cdcf..349e839c4c18 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -672,9 +672,6 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
 int ocelot_regfields_init(struct ocelot *ocelot,
 			  const struct reg_field *const regfields);
 struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res);
-void ocelot_configure_cpu(struct ocelot *ocelot, int npi,
-			  enum ocelot_tag_prefix injection,
-			  enum ocelot_tag_prefix extraction);
 int ocelot_init(struct ocelot *ocelot);
 void ocelot_deinit(struct ocelot *ocelot);
 void ocelot_init_port(struct ocelot *ocelot, int port);
-- 
cgit v1.2.3


From c3975400c8014e39f7f272808377c97c906c8cee Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:02 +0300
Subject: net: dsa: allow drivers to request promiscuous mode on master

Currently DSA assumes that taggers don't mess with the destination MAC
address of the frames on RX. That is not always the case. Some DSA
headers are placed before the Ethernet header (ocelot), and others
simply mangle random bytes from the destination MAC address (sja1105
with its incl_srcpt option).

Currently the DSA master goes to promiscuous mode automatically when the
slave devices go too (such as when enslaved to a bridge), but in
standalone mode this is a problem that needs to be dealt with.

So give drivers the possibility to signal that their tagging protocol
will get randomly dropped otherwise, and let DSA deal with fixing that.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index d16057c5987a..46019edc32cb 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -84,6 +84,12 @@ struct dsa_device_ops {
 	unsigned int overhead;
 	const char *name;
 	enum dsa_tag_protocol proto;
+	/* Some tagging protocols either mangle or shift the destination MAC
+	 * address, in which case the DSA master would drop packets on ingress
+	 * if what it understands out of the destination MAC address is not in
+	 * its RX filter.
+	 */
+	bool promisc_on_master;
 };
 
 /* This structure defines the control interfaces that are overlayed by the
-- 
cgit v1.2.3


From 5124197ce58b5706bb60c2ecb3b79f4dfabab6e1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:04 +0300
Subject: net: dsa: tag_ocelot: use a short prefix on both ingress and egress

There are 2 goals that we follow:

- Reduce the header size
- Make the header size equal between RX and TX

The issue that required long prefix on RX was the fact that the ocelot
DSA tag, being put before Ethernet as it is, would overlap with the area
that a DSA master uses for RX filtering (destination MAC address
mainly).

Now that we can ask DSA to put the master in promiscuous mode, in theory
we could remove the prefix altogether and call it a day, but it looks
like we can't. Using no prefix on ingress, some packets (such as ICMP)
would be received, while others (such as PTP) would not be received.
This is because the DSA master we use (enetc) triggers parse errors
("MAC rx frame errors") presumably because it sees Ethernet frames with
a bad length. And indeed, when using no prefix, the EtherType (bytes
12-13 of the frame, bits 96-111) falls over the REW_VAL field from the
extraction header, aka the PTP timestamp.

When turning the short (32-bit) prefix on, the EtherType overlaps with
bits 64-79 of the extraction header, which are a reserved area
transmitted as zero by the switch. The packets are not dropped by the
DSA master with a short prefix. Actually, the frames look like this in
tcpdump (below is a PTP frame, with an extra dsa_8021q tag - dadb 0482 -
added by a downstream sja1105).

89:0c:a9:f2:01:00 > 88:80:00:0a:00:1d, 802.3, length 0: LLC, \
	dsap Unknown (0x10) Individual, ssap ProWay NM (0x0e) Response, \
	ctrl 0x0004: Information, send seq 2, rcv seq 0, \
	Flags [Response], length 78

0x0000:  8880 000a 001d 890c a9f2 0100 0000 100f  ................
0x0010:  0400 0000 0180 c200 000e 001f 7b63 0248  ............{c.H
0x0020:  dadb 0482 88f7 1202 0036 0000 0000 0000  .........6......
0x0030:  0000 0000 0000 0000 0000 001f 7bff fe63  ............{..c
0x0040:  0248 0001 1f81 0500 0000 0000 0000 0000  .H..............
0x0050:  0000 0000 0000 0000 0000 0000            ............

So the short prefix is our new default: we've shortened our RX frames by
12 octets, increased TX by 4, and headers are now equal between RX and
TX. Note that we still need promiscuous mode for the DSA master to not
drop it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 349e839c4c18..3093385f6147 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -101,6 +101,7 @@
 #define OCELOT_TAG_LEN			16
 #define OCELOT_SHORT_PREFIX_LEN		4
 #define OCELOT_LONG_PREFIX_LEN		16
+#define OCELOT_TOTAL_TAG_LEN	(OCELOT_SHORT_PREFIX_LEN + OCELOT_TAG_LEN)
 
 #define OCELOT_SPEED_2500		0
 #define OCELOT_SPEED_1000		1
-- 
cgit v1.2.3


From 2e8cb1b3db384382c84cc5f765c821454640aac1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:05 +0300
Subject: net: dsa: make the .flow_dissect tagger callback return void

There is no tagger that returns anything other than zero, so just change
the return type appropriately.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 46019edc32cb..98d339311898 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -74,8 +74,8 @@ struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
 			       struct packet_type *pt);
-	int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
-			    int *offset);
+	void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
+			     int *offset);
 	/* Used to determine which traffic should match the DSA filter in
 	 * eth_type_trans, and which, if any, should bypass it and be processed
 	 * as regular on the master net device.
-- 
cgit v1.2.3


From 9790cf20a8c4bb8d774797c238fa3643f4336e46 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:06 +0300
Subject: net: dsa: add a generic procedure for the flow dissector

For all DSA formats that don't use tail tags, it looks like behind the
obscure number crunching they're all doing the same thing: locating the
real EtherType behind the DSA tag. Nonetheless, this is not immediately
obvious, so create a generic helper for those DSA taggers that put the
header before the EtherType.

Another assumption for the generic function is that the DSA tags are of
equal length on RX and on TX. Prior to the previous patch, this was not
true for ocelot and for gswip. The problem was resolved for ocelot, but
for gswip it still remains, so that can't use this helper yet.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 98d339311898..817fab5e2c21 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -711,6 +711,32 @@ static inline bool dsa_can_decode(const struct sk_buff *skb,
 	return false;
 }
 
+/* All DSA tags that push the EtherType to the right (basically all except tail
+ * tags, which don't break dissection) can be treated the same from the
+ * perspective of the flow dissector.
+ *
+ * We need to return:
+ *  - offset: the (B - A) difference between:
+ *    A. the position of the real EtherType and
+ *    B. the current skb->data (aka ETH_HLEN bytes into the frame, aka 2 bytes
+ *       after the normal EtherType was supposed to be)
+ *    The offset in bytes is exactly equal to the tagger overhead (and half of
+ *    that, in __be16 shorts).
+ *
+ *  - proto: the value of the real EtherType.
+ */
+static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb,
+						__be16 *proto, int *offset)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+	const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops;
+	int tag_len = ops->overhead;
+
+	*offset = tag_len;
+	*proto = ((__be16 *)skb->data)[(tag_len / 2) - 1];
+#endif
+}
+
 #if IS_ENABLED(CONFIG_NET_DSA)
 static inline int __dsa_netdevice_ops_check(struct net_device *dev)
 {
-- 
cgit v1.2.3


From 7a6ffe764be35af0527d8cfd047945e8f8797ddf Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 26 Sep 2020 22:32:07 +0300
Subject: net: dsa: point out the tail taggers

The Marvell 88E6060 uses tag_trailer.c and the KSZ8795, KSZ9477 and
KSZ9893 switches also use tail tags.

Tell that to the DSA core, since this makes a difference for the flow
dissector. Most switches break the parsing of frame headers, but these
ones don't, so no flow dissector adjustment needs to be done for them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 817fab5e2c21..b502a63d196e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -90,6 +90,7 @@ struct dsa_device_ops {
 	 * its RX filter.
 	 */
 	bool promisc_on_master;
+	bool tail_tag;
 };
 
 /* This structure defines the control interfaces that are overlayed by the
-- 
cgit v1.2.3


From e622129569963fa3bab976685443756e5da70da9 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Sun, 27 Sep 2020 16:01:50 +0800
Subject: ptp: add stub function for ptp_get_msgtype()

Added the missing stub function for ptp_get_msgtype().

Fixes: 036c508ba95e ("ptp: Add generic ptp message type function")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 8437307cca8c..c6487b7ab026 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -134,5 +134,13 @@ static inline struct ptp_header *ptp_parse_header(struct sk_buff *skb,
 {
 	return NULL;
 }
+static inline u8 ptp_get_msgtype(const struct ptp_header *hdr,
+				 unsigned int type)
+{
+	/* The return is meaningless. The stub function would not be
+	 * executed since no available header from ptp_parse_header.
+	 */
+	return 0;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */
-- 
cgit v1.2.3


From c8cb5b854b40f2ce52ccd032fa19750f4181d5fc Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Fri, 18 Sep 2020 11:33:13 +0200
Subject: nl80211/cfg80211: support 6 GHz scanning

Support 6 GHz scanning, by
 * a new scan flag to scan for colocated BSSes advertised
   by (and found) APs on 2.4 & 5 GHz
 * doing the necessary reduced neighbor report parsing for
   this, to find them
 * adding the ability to split the scan request in case the
   device by itself cannot support this.

Also add some necessary bits in mac80211 to not break with
these changes.

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200918113313.232917c93af9.Ida22f0212f9122f47094d81659e879a50434a6a2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 32 +++++++++++++++++++++++++++++++-
 include/uapi/linux/nl80211.h |  3 +++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 10c2cc8f0efc..11eb81676e95 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2095,6 +2095,27 @@ struct cfg80211_scan_info {
 	bool aborted;
 };
 
+/**
+ * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only
+ *
+ * @short_bssid: short ssid to scan for
+ * @bssid: bssid to scan for
+ * @channel_idx: idx of the channel in the channel array in the scan request
+ *	 which the above info relvant to
+ * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
+ * @short_ssid_valid: short_ssid is valid and can be used
+ * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
+ *       20 TUs before starting to send probe requests.
+ */
+struct cfg80211_scan_6ghz_params {
+	u32 short_ssid;
+	u32 channel_idx;
+	u8 bssid[ETH_ALEN];
+	bool unsolicited_probe;
+	bool short_ssid_valid;
+	bool psc_no_listen;
+};
+
 /**
  * struct cfg80211_scan_request - scan request description
  *
@@ -2122,6 +2143,10 @@ struct cfg80211_scan_info {
  * @mac_addr_mask: MAC address mask used with randomisation, bits that
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
+ * @scan_6ghz: relevant for split scan request only,
+ *	true if this is the second scan request
+ * @n_6ghz_params: number of 6 GHz params
+ * @scan_6ghz_params: 6 GHz params
  * @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
  */
 struct cfg80211_scan_request {
@@ -2149,6 +2174,9 @@ struct cfg80211_scan_request {
 	struct cfg80211_scan_info info;
 	bool notified;
 	bool no_cck;
+	bool scan_6ghz;
+	u32 n_6ghz_params;
+	struct cfg80211_scan_6ghz_params *scan_6ghz_params;
 
 	/* keep last */
 	struct ieee80211_channel *channels[];
@@ -4217,6 +4245,8 @@ struct cfg80211_ops {
 /**
  * enum wiphy_flags - wiphy capability flags
  *
+ * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
+ *	 into two, first for legacy bands and second for UHB.
  * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
  *	wiphy at all
  * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -4260,7 +4290,7 @@ struct cfg80211_ops {
 enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK		= BIT(0),
 	/* use hole at 1 */
-	/* use hole at 2 */
+	WIPHY_FLAG_SPLIT_SCAN_6GHZ		= BIT(2),
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
 	WIPHY_FLAG_PS_ON_BY_DEFAULT		= BIT(4),
 	WIPHY_FLAG_4ADDR_AP			= BIT(5),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index bdc90b8dfd24..c74ceaddb909 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6059,6 +6059,8 @@ enum nl80211_timeout_reason {
  * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with
  *	%NL80211_ATTR_SCAN_FREQ_KHZ. This also means
  *	%NL80211_ATTR_SCAN_FREQUENCIES will not be included.
+ * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by
+ *	2.4/5 GHz APs
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -6075,6 +6077,7 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_RANDOM_SN				= 1<<11,
 	NL80211_SCAN_FLAG_MIN_PREQ_CONTENT			= 1<<12,
 	NL80211_SCAN_FLAG_FREQ_KHZ				= 1<<13,
+	NL80211_SCAN_FLAG_COLOCATED_6GHZ			= 1<<14,
 };
 
 /**
-- 
cgit v1.2.3


From d2b7588a47de8322891de38ec14d15105d66cb1e Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:04 -0700
Subject: nl80211: support S1G capability overrides in assoc

NL80211_ATTR_S1G_CAPABILITY can be passed along with
NL80211_ATTR_S1G_CAPABILITY_MASK to NL80211_CMD_ASSOCIATE
to indicate S1G capabilities which should override the
hardware capabilities in eg. the association request.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-4-thomas@adapt-ip.com
[johannes: always require both attributes together, commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    | 2 ++
 include/net/cfg80211.h       | 3 +++
 include/uapi/linux/nl80211.h | 9 +++++++++
 3 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 53fba39d4ba6..f71cffa18176 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2330,6 +2330,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 }
 
 /* S1G Capabilities Information field */
+#define IEEE80211_S1G_CAPABILITY_LEN	15
+
 #define S1G_CAP0_S1G_LONG	BIT(0)
 #define S1G_CAP0_SGI_1MHZ	BIT(1)
 #define S1G_CAP0_SGI_2MHZ	BIT(2)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 11eb81676e95..bead4b9afeca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2556,6 +2556,8 @@ enum cfg80211_assoc_req_flags {
  * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
  *	Request/Response frame or %NULL if FILS is not used. This field starts
  *	with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
+ * @s1g_capa: S1G capability override
+ * @s1g_capa_mask: S1G capability override mask
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -2570,6 +2572,7 @@ struct cfg80211_assoc_request {
 	const u8 *fils_kek;
 	size_t fils_kek_len;
 	const u8 *fils_nonces;
+	struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c74ceaddb909..05db40b4c56f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2521,6 +2521,12 @@ enum nl80211_commands {
  *	unsolicited broadcast probe response. It is a nested attribute, see
  *	&enum nl80211_unsol_bcast_probe_resp_attributes.
  *
+ * @NL80211_ATTR_S1G_CAPABILITY: S1G Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION)
+ * @NL80211_ATTR_S1G_CAPABILITY_MASK: S1G Capability Information element
+ *	override mask. Used with NL80211_ATTR_S1G_CAPABILITY in
+ *	NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3007,6 +3013,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_UNSOL_BCAST_PROBE_RESP,
 
+	NL80211_ATTR_S1G_CAPABILITY,
+	NL80211_ATTR_S1G_CAPABILITY_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.2.3


From 9eaffe5078ca0808603cdd15c4eaf0106a996f3a Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:06 -0700
Subject: cfg80211: convert S1G beacon to scan results

The S1G beacon is an extension frame as opposed to
management frame for the regular beacon. This means we may
have to occasionally cast the frame buffer to a different
header type. Luckily this isn't too bad as scan results
mostly only care about the IEs.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-6-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f71cffa18176..1ce0b37441b9 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -151,6 +151,9 @@
 
 #define IEEE80211_ANO_NETTYPE_WILD              15
 
+/* bits unique to S1G beacon */
+#define IEEE80211_S1G_BCN_NEXT_TBTT    0x100
+
 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */
 #define IEEE80211_CTL_EXT_POLL		0x2000
 #define IEEE80211_CTL_EXT_SPR		0x3000
@@ -553,6 +556,28 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON);
 }
 
+/**
+ * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT &&
+ * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee80211_next_tbtt_present(__le16 fc)
+{
+	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
+	       cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) &&
+	       fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT);
+}
+
+/**
+ * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only
+ * true for S1G beacons when they're short.
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee80211_is_s1g_short_beacon(__le16 fc)
+{
+	return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc);
+}
+
 /**
  * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM
  * @fc: frame control bytes in little-endian byteorder
@@ -1034,6 +1059,13 @@ struct ieee80211_ext {
 			u8 change_seq;
 			u8 variable[0];
 		} __packed s1g_beacon;
+		struct {
+			u8 sa[ETH_ALEN];
+			__le32 timestamp;
+			u8 change_seq;
+			u8 next_tbtt[3];
+			u8 variable[0];
+		} __packed s1g_short_beacon;
 	} u;
 } __packed __aligned(2);
 
-- 
cgit v1.2.3


From 80ca25711380c8eabe51eed875ca9432b4f8939e Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:09 -0700
Subject: cfg80211: handle Association Response from S1G STA

The sending STA type is implicit based on beacon or probe
response content. If sending STA was an S1G STA, adjust
the Information Element location accordingly.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-9-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 1ce0b37441b9..d0fda8424118 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1100,6 +1100,11 @@ struct ieee80211_mgmt {
 			/* followed by Supported rates */
 			u8 variable[0];
 		} __packed assoc_resp, reassoc_resp;
+		struct {
+			__le16 capab_info;
+			__le16 status_code;
+			u8 variable[0];
+		} __packed s1g_assoc_resp, s1g_reassoc_resp;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
-- 
cgit v1.2.3


From 05d109576a36fd498e5db2d905eb50c7dd844b83 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:10 -0700
Subject: mac80211: encode listen interval for S1G

S1G allows listen interval up to 2^14 * 10000 beacon
intervals. In order to do this listen interval needs a
scaling factor applied to the lower 14 bits. Calculate
this and properly encode the listen interval for S1G STAs.

See IEEE802.11ah-2016 Table 9-44a for reference.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-10-thomas@adapt-ip.com
[move listen_int_usf into function using it]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d0fda8424118..7b6af47dd279 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2448,6 +2448,13 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ	BIT(0)
 #define S1G_OPER_CH_WIDTH_OPER		GENMASK(4, 1)
 
+
+#define LISTEN_INT_USF	GENMASK(15, 14)
+#define LISTEN_INT_UI	GENMASK(13, 0)
+
+#define IEEE80211_MAX_USF	FIELD_MAX(LISTEN_INT_USF)
+#define IEEE80211_MAX_UI	FIELD_MAX(LISTEN_INT_UI)
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
-- 
cgit v1.2.3


From 1821f8b36f112be9e3071779da82e14384fc6989 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:12 -0700
Subject: mac80211: handle S1G low rates

S1G doesn't have legacy (sband->bitrates) rates, only MCS.
For now, just send a frame at MCS 0 if a low rate is
requested. Note we also redefine (since we're out of TX
flags) TX_RC_VHT_MCS as TX_RC_S1G_MCS to indicate an S1G
MCS. This is probably OK as VHT MCS is not valid on S1G
band and vice versa.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-12-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e90089d104b0..de22524e9270 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -833,6 +833,8 @@ enum mac80211_tx_info_flags {
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
 
+#define IEEE80211_TX_RC_S1G_MCS IEEE80211_TX_RC_VHT_MCS
+
 /**
  * enum mac80211_tx_control_flags - flags to describe transmit control
  *
-- 
cgit v1.2.3


From 1d00ce807efaa0ee3a96de7801be042a06d35873 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:15 -0700
Subject: mac80211: support S1G association

The changes required for associating in S1G are:

- apply S1G BSS channel info before assoc
- mark all S1G STAs as QoS STAs
- include and parse AID request element
- handle new Association Response format
- don't fail assoc if supported rates element is missing

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-15-thomas@adapt-ip.com
[pass skb to ieee80211_add_aid_request_ie(), remove unused variable 'bss']
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 21 +++++++++++++++++++++
 include/net/mac80211.h    |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7b6af47dd279..f2f56b287aed 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -987,6 +987,25 @@ enum ieee80211_vht_opmode_bits {
 	IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF	= 0x80,
 };
 
+/**
+ * enum ieee80211_s1g_chanwidth
+ * These are defined in IEEE802.11-2016ah Table 10-20
+ * as BSS Channel Width
+ *
+ * @IEEE80211_S1G_CHANWIDTH_1MHZ: 1MHz operating channel
+ * @IEEE80211_S1G_CHANWIDTH_2MHZ: 2MHz operating channel
+ * @IEEE80211_S1G_CHANWIDTH_4MHZ: 4MHz operating channel
+ * @IEEE80211_S1G_CHANWIDTH_8MHZ: 8MHz operating channel
+ * @IEEE80211_S1G_CHANWIDTH_16MHZ: 16MHz operating channel
+ */
+enum ieee80211_s1g_chanwidth {
+	IEEE80211_S1G_CHANWIDTH_1MHZ = 0,
+	IEEE80211_S1G_CHANWIDTH_2MHZ = 1,
+	IEEE80211_S1G_CHANWIDTH_4MHZ = 3,
+	IEEE80211_S1G_CHANWIDTH_8MHZ = 7,
+	IEEE80211_S1G_CHANWIDTH_16MHZ = 15,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 #define WLAN_MEMBERSHIP_LEN 8
 #define WLAN_USER_POSITION_LEN 16
@@ -2854,6 +2873,8 @@ enum ieee80211_eid {
 
 	WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201,
 
+	WLAN_EID_AID_REQUEST = 210,
+	WLAN_EID_AID_RESPONSE = 211,
 	WLAN_EID_S1G_BCN_COMPAT = 213,
 	WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214,
 	WLAN_EID_S1G_CAPABILITIES = 217,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index de22524e9270..72bc877d2c22 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -627,6 +627,7 @@ struct ieee80211_fils_discovery {
  * @fils_discovery: FILS discovery configuration
  * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response
  *	interval.
+ * @s1g: BSS is S1G BSS (affects Association Request format).
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -696,6 +697,7 @@ struct ieee80211_bss_conf {
 	struct cfg80211_he_bss_color he_bss_color;
 	struct ieee80211_fils_discovery fils_discovery;
 	u32 unsol_bcast_probe_resp_interval;
+	bool s1g;
 };
 
 /**
-- 
cgit v1.2.3


From 58ef7c1b555e0e605da24b76cb2821dd3fcd6bc6 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:16 -0700
Subject: nl80211: include frequency offset in survey info

Recently channels gained a potential frequency offset, so
include this in the per-channel survey info.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-16-thomas@adapt-ip.com
[add the offset only if non-zero]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 05db40b4c56f..1e51445f81cd 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4097,6 +4097,7 @@ enum nl80211_user_reg_hint_type {
  *	receiving frames destined to the local BSS
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
+ * @NL80211_SURVEY_INFO_FREQUENCY_OFFSET: center frequency offset in KHz
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
  */
 enum nl80211_survey_info {
@@ -4112,6 +4113,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_SCAN,
 	NL80211_SURVEY_INFO_PAD,
 	NL80211_SURVEY_INFO_TIME_BSS_RX,
+	NL80211_SURVEY_INFO_FREQUENCY_OFFSET,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
-- 
cgit v1.2.3


From 265a0708339daeb71848169f52b91066cc2984fd Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 22 Sep 2020 12:19:56 -0700
Subject: mac80211: Support not iterating over not-sdata-in-driver ifaces

Allow drivers to request that interface-iterator does NOT iterate
over interfaces that are not sdata-in-driver.  This will allow
us to fix crashes in ath10k (and possibly other drivers).

To summarize Johannes' explanation:

Consider

add interface wlan0
add interface wlan1
iterate active interfaces -> wlan0 wlan1
add interface wlan2
iterate active interfaces -> wlan0 wlan1 wlan2

If you apply this scenario to a restart, which ought to be functionally
equivalent to the normal startup, just compressed in time, you're
basically saying that today you get

add interface wlan0
add interface wlan1
iterate active interfaces -> wlan0 wlan1 wlan2 << problem here
add interface wlan2
iterate active interfaces -> wlan0 wlan1 wlan2

which yeah, totally seems wrong.

But fixing that to be

add interface wlan0
add interface wlan1
iterate active interfaces ->
<nothing>
add interface wlan2
iterate active interfaces -> <nothing>
(or
maybe -> wlan0 wlan1 wlan2 if the reconfig already completed)

This is also at least somewhat wrong, but better to not iterate
over something that exists in the driver than iterate over something
that does not.  Originally the first issue was causing crashes in
testing with lots of station vdevs on an ath10k radio, combined
with firmware crashing.

I ran with a similar patch for years with no obvious bad results,
including significant testing with ath9k and ath10k.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Link: https://lore.kernel.org/r/20200922191957.25257-1-greearb@candelatech.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 72bc877d2c22..4747d446179a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5407,11 +5407,15 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw);
  * @IEEE80211_IFACE_ITER_RESUME_ALL: During resume, iterate over all
  *	interfaces, even if they haven't been re-added to the driver yet.
  * @IEEE80211_IFACE_ITER_ACTIVE: Iterate only active interfaces (netdev is up).
+ * @IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER: Skip any interfaces where SDATA
+ *	is not in the driver.  This may fix crashes during firmware recovery
+ *	for instance.
  */
 enum ieee80211_interface_iteration_flags {
 	IEEE80211_IFACE_ITER_NORMAL	= 0,
 	IEEE80211_IFACE_ITER_RESUME_ALL	= BIT(0),
 	IEEE80211_IFACE_ITER_ACTIVE	= BIT(1),
+	IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER	= BIT(2),
 };
 
 /**
-- 
cgit v1.2.3


From f5bec330e3010450daeb5cb6a94a4a7c54afa306 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Mon, 28 Sep 2020 00:28:11 -0700
Subject: nl80211: extend support to config spatial reuse parameter set

Allow the user to configure below Spatial Reuse Parameter Set element.
  * Non-SRG OBSS PD Max Offset
  * SRG BSS Color Bitmap
  * SRG Partial BSSID Bitmap

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1601278091-20313-2-git-send-email-rmanohar@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  7 +++++--
 include/net/cfg80211.h       | 10 ++++++++++
 include/uapi/linux/nl80211.h | 11 +++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f2f56b287aed..770408b2fdaf 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2350,8 +2350,11 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
 }
 
 /* HE Spatial Reuse defines */
-#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			0x4
-#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		0x8
+#define IEEE80211_HE_SPR_PSR_DISALLOWED				BIT(0)
+#define IEEE80211_HE_SPR_NON_SRG_OBSS_PD_SR_DISALLOWED		BIT(1)
+#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			BIT(2)
+#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		BIT(3)
+#define IEEE80211_HE_SPR_HESIGA_SR_VAL15_ALLOWED		BIT(4)
 
 /*
  * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bead4b9afeca..aee47f2b5709 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -269,13 +269,23 @@ struct ieee80211_rate {
  * struct ieee80211_he_obss_pd - AP settings for spatial reuse
  *
  * @enable: is the feature enabled.
+ * @sr_ctrl: The SR Control field of SRP element.
+ * @non_srg_max_offset: non-SRG maximum tx power offset
  * @min_offset: minimal tx power offset an associated station shall use
  * @max_offset: maximum tx power offset an associated station shall use
+ * @bss_color_bitmap: bitmap that indicates the BSS color values used by
+ *	members of the SRG
+ * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values
+ *	used by members of the SRG
  */
 struct ieee80211_he_obss_pd {
 	bool enable;
+	u8 sr_ctrl;
+	u8 non_srg_max_offset;
 	u8 min_offset;
 	u8 max_offset;
+	u8 bss_color_bitmap[8];
+	u8 partial_bssid_bitmap[8];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1e51445f81cd..47700a2b9af9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6991,6 +6991,13 @@ enum nl80211_peer_measurement_ftm_resp {
  *
  * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset.
  * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET: the non-SRG OBSS PD maximum
+ *	tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP: bitmap that indicates the BSS color
+ *	values used by members of the SRG.
+ * @NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP: bitmap that indicates the partial
+ *	BSSID values used by members of the SRG.
+ * @NL80211_HE_OBSS_PD_ATTR_SR_CTRL: The SR Control field of SRP element.
  *
  * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal
  * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute.
@@ -7000,6 +7007,10 @@ enum nl80211_obss_pd_attributes {
 
 	NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET,
 	NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET,
+	NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET,
+	NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP,
+	NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP,
+	NL80211_HE_OBSS_PD_ATTR_SR_CTRL,
 
 	/* keep last */
 	__NL80211_HE_OBSS_PD_ATTR_LAST,
-- 
cgit v1.2.3


From 74cc6d182d038cbba6c6d91beb1b2bab926b618b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 25 Sep 2020 17:56:40 -0700
Subject: udp_tunnel: add the ability to share port tables

Unfortunately recent Intel NIC designs share the UDP port table
across netdevs. So far the UDP tunnel port state was maintained
per netdev, we need to extend that to cater to Intel NICs.

Expect NICs to allocate the info structure dynamically and link
to the state from there. All the shared NICs will record port
offload information in the one instance of the table so we need
to make sure that the use count can accommodate larger numbers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 94bb7a882250..2ea453dac876 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags {
 	UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN	= BIT(3),
 };
 
+struct udp_tunnel_nic;
+
+#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES	(U16_MAX / 2)
+
+struct udp_tunnel_nic_shared {
+	struct udp_tunnel_nic *udp_tunnel_nic_info;
+
+	struct list_head devices;
+};
+
+struct udp_tunnel_nic_shared_node {
+	struct net_device *dev;
+	struct list_head list;
+};
+
 /**
  * struct udp_tunnel_nic_info - driver UDP tunnel offload information
  * @set_port:	callback for adding a new port
  * @unset_port:	callback for removing a port
  * @sync_table:	callback for syncing the entire port table at once
+ * @shared:	reference to device global state (optional)
  * @flags:	device flags from enum udp_tunnel_nic_info_flags
  * @tables:	UDP port tables this device has
  * @tables.n_entries:		number of entries in this table
@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags {
  * Drivers are expected to provide either @set_port and @unset_port callbacks
  * or the @sync_table callback. Callbacks are invoked with rtnl lock held.
  *
+ * Devices which (misguidedly) share the UDP tunnel port table across multiple
+ * netdevs should allocate an instance of struct udp_tunnel_nic_shared and
+ * point @shared at it.
+ * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices
+ * sharing a table.
+ *
  * Known limitations:
  *  - UDP tunnel port notifications are fundamentally best-effort -
  *    it is likely the driver will both see skbs which use a UDP tunnel port,
@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info {
 	/* all at once */
 	int (*sync_table)(struct net_device *dev, unsigned int table);
 
+	struct udp_tunnel_nic_shared *shared;
+
 	unsigned int flags;
 
 	struct udp_tunnel_nic_table_info {
-- 
cgit v1.2.3


From 1b4d60ec162f82ea29a2e7a907b5c6cc9f926321 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 25 Sep 2020 13:54:29 -0700
Subject: bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint

Add .test_run for raw_tracepoint. Also, introduce a new feature that runs
the target program on a specific CPU. This is achieved by a new flag in
bpf_attr.test, BPF_F_TEST_RUN_ON_CPU. When this flag is set, the program
is triggered on cpu with id bpf_attr.test.cpu. This feature is needed for
BPF programs that handle perf_event and other percpu resources, as the
program can access these resource locally.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200925205432.1777-2-songliubraving@fb.com
---
 include/linux/bpf.h      | 3 +++
 include/uapi/linux/bpf.h | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 79902325bef8..db6dcdee7933 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1396,6 +1396,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr);
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+			     const union bpf_attr *kattr,
+			     union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2d6519a2ed77..82522f05c021 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -424,6 +424,11 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
+
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
 	/* enabled run_time_ns and run_cnt */
@@ -566,6 +571,8 @@ union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		flags;
+		__u32		cpu;
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
-- 
cgit v1.2.3


From 201091ebb2a161a0e10aab36186690b332941f6a Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Sat, 26 Sep 2020 12:44:24 +0200
Subject: net/smc: introduce System Enterprise ID (SEID)

SMCD version 2 defines a System Enterprise ID (short SEID).
This patch contains the SEID creation and adds the callback to
retrieve the created SEID.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index 646feb4bc75f..b28b384d0625 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -37,6 +37,8 @@ struct smcd_dmb {
 #define ISM_EVENT_GID	1
 #define ISM_EVENT_SWR	2
 
+#define ISM_RESERVED_VLANID	0x1FFF
+
 #define ISM_ERROR	0xFFFF
 
 struct smcd_event {
@@ -63,6 +65,7 @@ struct smcd_ops {
 	int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
 			 bool sf, unsigned int offset, void *data,
 			 unsigned int size);
+	void (*get_system_eid)(struct smcd_dev *dev, u8 **eid);
 };
 
 struct smcd_dev {
-- 
cgit v1.2.3


From 8caaccf521c160d231587091f1f5e8aef2dd0a5e Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Sat, 26 Sep 2020 12:44:25 +0200
Subject: net/smc: introduce CHID callback for ISM devices

With SMCD version 2 the CHIDs of ISM devices are needed for the
CLC handshake.
This patch provides the new callback to retrieve the CHID of an
ISM device.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index b28b384d0625..e441aa97ad61 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -66,6 +66,7 @@ struct smcd_ops {
 			 bool sf, unsigned int offset, void *data,
 			 unsigned int size);
 	void (*get_system_eid)(struct smcd_dev *dev, u8 **eid);
+	u16 (*get_chid)(struct smcd_dev *dev);
 };
 
 struct smcd_dev {
-- 
cgit v1.2.3


From efc68158c429f37d87fd02ee9a26913c78546fc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Fri, 25 Sep 2020 23:25:01 +0200
Subject: bpf: change logging calls from verbose() to bpf_log() and use log
 pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for moving code around, change a bunch of references to
env->log (and the verbose() logging helper) to use bpf_log() and a direct
pointer to struct bpf_verifier_log. While we're touching the function
signature, mark the 'prog' argument to bpf_check_type_match() as const.

Also enhance the bpf_verifier_log_needed() check to handle NULL pointers
for the log struct so we can re-use the code with logging disabled.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 2 +-
 include/linux/bpf_verifier.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index db6dcdee7933..5176726f4f03 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1420,7 +1420,7 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
 			     struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			  struct bpf_reg_state *reg);
-int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
+int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
 			 struct btf *btf, const struct btf_type *t);
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 2bb48a2c4d08..7bc9276c4ef4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -347,8 +347,9 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 
 static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 {
-	return (log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
-		log->level == BPF_LOG_KERNEL;
+	return log &&
+		((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
+		 log->level == BPF_LOG_KERNEL);
 }
 
 #define BPF_MAX_SUBPROGS 256
-- 
cgit v1.2.3


From f7b12b6fea00988496b7606d4964cd77beef46a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Fri, 25 Sep 2020 23:25:02 +0200
Subject: bpf: verifier: refactor check_attach_btf_id()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The check_attach_btf_id() function really does three things:

1. It performs a bunch of checks on the program to ensure that the
   attachment is valid.

2. It stores a bunch of state about the attachment being requested in
   the verifier environment and struct bpf_prog objects.

3. It allocates a trampoline for the attachment.

This patch splits out (1.) and (3.) into separate functions which will
perform the checks, but return the computed values instead of directly
modifying the environment. This is done in preparation for reusing the
checks when the actual attachment is happening, which will allow tracing
programs to have multiple (compatible) attachments.

This also fixes a bug where a bunch of checks were skipped if a trampoline
already existed for the tracing target.

Fixes: 6ba43b761c41 ("bpf: Attachment verification for BPF_MODIFY_RETURN")
Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs")
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 19 ++++++++++++++-----
 include/linux/bpf_verifier.h | 13 +++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5176726f4f03..b89a30764069 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -606,6 +606,13 @@ struct bpf_trampoline {
 	struct bpf_ksym ksym;
 };
 
+struct bpf_attach_target_info {
+	struct btf_func_model fmodel;
+	long tgt_addr;
+	const char *tgt_name;
+	const struct btf_type *tgt_type;
+};
+
 #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
 
 struct bpf_dispatcher_prog {
@@ -633,9 +640,10 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
 	return bpf_func(ctx, insnsi);
 }
 #ifdef CONFIG_BPF_JIT
-struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
 int bpf_trampoline_link_prog(struct bpf_prog *prog);
 int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+struct bpf_trampoline *bpf_trampoline_get(u64 key,
+					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
@@ -680,10 +688,6 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
 #else
-static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
-{
-	return NULL;
-}
 static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
 {
 	return -ENOTSUPP;
@@ -692,6 +696,11 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
 {
 	return -ENOTSUPP;
 }
+static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
+							struct bpf_attach_target_info *tgt_info)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
 #define DEFINE_BPF_DISPATCHER(name)
 #define DECLARE_BPF_DISPATCHER(name)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7bc9276c4ef4..363b4f1c562a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -450,4 +450,17 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 int check_ctx_reg(struct bpf_verifier_env *env,
 		  const struct bpf_reg_state *reg, int regno);
 
+/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
+static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
+					     u32 btf_id)
+{
+        return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id;
+}
+
+int bpf_check_attach_target(struct bpf_verifier_log *log,
+			    const struct bpf_prog *prog,
+			    const struct bpf_prog *tgt_prog,
+			    u32 btf_id,
+			    struct bpf_attach_target_info *tgt_info);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit v1.2.3


From 76654e67f3a01c50dc13dd6dea75e58943413956 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:03 +0100
Subject: bpf: Provide function to get vmlinux BTF information

It will be used later for BPF structure display support

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-2-git-send-email-alan.maguire@oracle.com
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b89a30764069..e620a4b1290f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1364,6 +1364,8 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
 	      union bpf_attr __user *uattr);
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
+struct btf *bpf_get_btf_vmlinux(void);
+
 /* Map specifics */
 struct xdp_buff;
 struct sk_buff;
-- 
cgit v1.2.3


From 31d0bc81637d8d974a6dad9827b765b4b70c89d7 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:04 +0100
Subject: bpf: Move to generic BTF show support, apply it to seq files/strings

generalize the "seq_show" seq file support in btf.c to support
a generic show callback of which we support two instances; the
current seq file show, and a show with snprintf() behaviour which
instead writes the type data to a supplied string.

Both classes of show function call btf_type_show() with different
targets; the seq file or the string to be written.  In the string
case we need to track additional data - length left in string to write
and length to return that we would have written (a la snprintf).

By default show will display type information, field members and
their types and values etc, and the information is indented
based upon structure depth. Zeroed fields are omitted.

Show however supports flags which modify its behaviour:

BTF_SHOW_COMPACT - suppress newline/indent.
BTF_SHOW_NONAME - suppress show of type and member names.
BTF_SHOW_PTR_RAW - do not obfuscate pointer values.
BTF_SHOW_UNSAFE - do not copy data to safe buffer before display.
BTF_SHOW_ZERO - show zeroed values (by default they are not shown).

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-3-git-send-email-alan.maguire@oracle.com
---
 include/linux/btf.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index a9af5e7a7ece..d0f5d3c9ec3d 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -13,6 +13,7 @@ struct btf;
 struct btf_member;
 struct btf_type;
 union bpf_attr;
+struct btf_show;
 
 extern const struct file_operations btf_fops;
 
@@ -46,8 +47,43 @@ int btf_get_info_by_fd(const struct btf *btf,
 const struct btf_type *btf_type_id_size(const struct btf *btf,
 					u32 *type_id,
 					u32 *ret_size);
+
+/*
+ * Options to control show behaviour.
+ *	- BTF_SHOW_COMPACT: no formatting around type information
+ *	- BTF_SHOW_NONAME: no struct/union member names/types
+ *	- BTF_SHOW_PTR_RAW: show raw (unobfuscated) pointer values;
+ *	  equivalent to %px.
+ *	- BTF_SHOW_ZERO: show zero-valued struct/union members; they
+ *	  are not displayed by default
+ *	- BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
+ *	  data before displaying it.
+ */
+#define BTF_SHOW_COMPACT	(1ULL << 0)
+#define BTF_SHOW_NONAME		(1ULL << 1)
+#define BTF_SHOW_PTR_RAW	(1ULL << 2)
+#define BTF_SHOW_ZERO		(1ULL << 3)
+#define BTF_SHOW_UNSAFE		(1ULL << 4)
+
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
+
+/*
+ * Copy len bytes of string representation of obj of BTF type_id into buf.
+ *
+ * @btf: struct btf object
+ * @type_id: type id of type obj points to
+ * @obj: pointer to typed data
+ * @buf: buffer to write to
+ * @len: maximum length to write to buf
+ * @flags: show options (see above)
+ *
+ * Return: length that would have been/was copied as per snprintf, or
+ *	   negative error.
+ */
+int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
+			   char *buf, int len, u64 flags);
+
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
-- 
cgit v1.2.3


From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:05 +0100
Subject: bpf: Add bpf_snprintf_btf helper

A helper is added to support tracing kernel type information in BPF
using the BPF Type Format (BTF).  Its signature is

long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr,
		      u32 btf_ptr_size, u64 flags);

struct btf_ptr * specifies

- a pointer to the data to be traced
- the BTF id of the type of data pointed to
- a flags field is provided for future use; these flags
  are not to be confused with the BTF_F_* flags
  below that control how the btf_ptr is displayed; the
  flags member of the struct btf_ptr may be used to
  disambiguate types in kernel versus module BTF, etc;
  the main distinction is the flags relate to the type
  and information needed in identifying it; not how it
  is displayed.

For example a BPF program with a struct sk_buff *skb
could do the following:

	static struct btf_ptr b = { };

	b.ptr = skb;
	b.type_id = __builtin_btf_type_id(struct sk_buff, 1);
	bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0);

Default output looks like this:

(struct sk_buff){
 .transport_header = (__u16)65535,
 .mac_header = (__u16)65535,
 .end = (sk_buff_data_t)192,
 .head = (unsigned char *)0x000000007524fd8b,
 .data = (unsigned char *)0x000000007524fd8b,
 .truesize = (unsigned int)768,
 .users = (refcount_t){
  .refs = (atomic_t){
   .counter = (int)1,
  },
 },
}

Flags modifying display are as follows:

- BTF_F_COMPACT:	no formatting around type information
- BTF_F_NONAME:		no struct/union member names/types
- BTF_F_PTR_RAW:	show raw (unobfuscated) pointer values;
			equivalent to %px.
- BTF_F_ZERO:		show zero-valued struct/union members;
			they are not displayed by default

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com
---
 include/linux/bpf.h      |  1 +
 include/linux/btf.h      |  9 ++++---
 include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e620a4b1290f..768b533ba48e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1822,6 +1822,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
+extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d0f5d3c9ec3d..3e5cdc2ba963 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 
@@ -59,10 +60,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
  *	- BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
  *	  data before displaying it.
  */
-#define BTF_SHOW_COMPACT	(1ULL << 0)
-#define BTF_SHOW_NONAME		(1ULL << 1)
-#define BTF_SHOW_PTR_RAW	(1ULL << 2)
-#define BTF_SHOW_ZERO		(1ULL << 3)
+#define BTF_SHOW_COMPACT	BTF_F_COMPACT
+#define BTF_SHOW_NONAME		BTF_F_NONAME
+#define BTF_SHOW_PTR_RAW	BTF_F_PTR_RAW
+#define BTF_SHOW_ZERO		BTF_F_ZERO
 #define BTF_SHOW_UNSAFE		(1ULL << 4)
 
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 82522f05c021..cca9eb1b13e5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3594,6 +3594,42 @@ union bpf_attr {
  * 		the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to store a string representation of *ptr*->ptr in *str*,
+ *		using *ptr*->type_id.  This value should specify the type
+ *		that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ *		can be used to look up vmlinux BTF type ids. Traversing the
+ *		data structure using BTF, the type information and values are
+ *		stored in the first *str_size* - 1 bytes of *str*.  Safe copy of
+ *		the pointer data is carried out to avoid kernel crashes during
+ *		operation.  Smaller types can use string space on the stack;
+ *		larger programs can use map data to store the string
+ *		representation.
+ *
+ *		The string can be subsequently shared with userspace via
+ *		bpf_perf_event_output() or ring buffer interfaces.
+ *		bpf_trace_printk() is to be avoided as it places too small
+ *		a limit on string size to be useful.
+ *
+ *		*flags* is a combination of
+ *
+ *		**BTF_F_COMPACT**
+ *			no formatting around type information
+ *		**BTF_F_NONAME**
+ *			no struct/union member names/types
+ *		**BTF_F_PTR_RAW**
+ *			show raw (unobfuscated) pointer values;
+ *			equivalent to printk specifier %px.
+ *		**BTF_F_ZERO**
+ *			show zero-valued struct/union members; they
+ *			are not displayed by default
+ *
+ *	Return
+ *		The number of bytes that were written (or would have been
+ *		written if output had to be truncated due to string size),
+ *		or a negative error in cases of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3745,6 +3781,7 @@ union bpf_attr {
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
 	FN(copy_from_user),		\
+	FN(snprintf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4853,4 +4890,34 @@ struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above.  A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+	void *ptr;
+	__u32 type_id;
+	__u32 flags;		/* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ *     - BTF_F_COMPACT: no formatting around type information
+ *     - BTF_F_NONAME: no struct/union member names/types
+ *     - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+	BTF_F_COMPACT	=	(1ULL << 0),
+	BTF_F_NONAME	=	(1ULL << 1),
+	BTF_F_PTR_RAW	=	(1ULL << 2),
+	BTF_F_ZERO	=	(1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From eb411377aed9e27835e77ee0710ee8f4649958f3 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:09 +0100
Subject: bpf: Add bpf_seq_printf_btf helper

A helper is added to allow seq file writing of kernel data
structures using vmlinux BTF.  Its signature is

long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr,
                        u32 btf_ptr_size, u64 flags);

Flags and struct btf_ptr definitions/use are identical to the
bpf_snprintf_btf helper, and the helper returns 0 on success
or a negative error value.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-8-git-send-email-alan.maguire@oracle.com
---
 include/linux/btf.h      | 2 ++
 include/uapi/linux/bpf.h | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 3e5cdc2ba963..024e16ff7dcc 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -68,6 +68,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
+int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj,
+			    struct seq_file *m, u64 flags);
 
 /*
  * Copy len bytes of string representation of obj of BTF type_id into buf.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cca9eb1b13e5..96ddb00b91dc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3630,6 +3630,14 @@ union bpf_attr {
  *		The number of bytes that were written (or would have been
  *		written if output had to be truncated due to string size),
  *		or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to write to seq_write a string representation of
+ *		*ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ *		*flags* are identical to those used for bpf_snprintf_btf.
+ *	Return
+ *		0 on success or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3782,6 +3790,7 @@ union bpf_attr {
 	FN(d_path),			\
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
+	FN(seq_printf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From b4c5f83ae3f3e2b3239751c304e424eace62448b Mon Sep 17 00:00:00 2001
From: Rusaimi Amira Ruslan <rusaimi.amira.rusaimi@intel.com>
Date: Mon, 28 Sep 2020 18:12:12 +0800
Subject: stmmac: intel: Adding ref clock 1us tic for LPI cntr

Adding reference clock (1us tic) for all LPI timer on Intel platforms.
The reference clock is derived from ptp clk. This also enables all LPI
counter.

Signed-off-by: Rusaimi Amira Ruslan <rusaimi.amira.rusaimi@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 00e83c877496..628e28903b8b 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -200,5 +200,6 @@ struct plat_stmmacenet_data {
 	int has_xgmac;
 	bool vlan_fail_q_en;
 	u8 vlan_fail_q;
+	unsigned int eee_usecs_rate;
 };
 #endif
-- 
cgit v1.2.3


From 3aac1ead5eb6b76f3d2b8d111e6de1c2de23fb34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 29 Sep 2020 14:45:50 +0200
Subject: bpf: Move prog->aux->linked_prog and trampoline into bpf_link on
 attach
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for allowing multiple attachments of freplace programs, move
the references to the target program and trampoline into the
bpf_tracing_link structure when that is created. To do this atomically,
introduce a new mutex in prog->aux to protect writing to the two pointers
to target prog and trampoline, and rename the members to make it clear that
they are related.

With this change, it is no longer possible to attach the same tracing
program multiple times (detaching in-between), since the reference from the
tracing program to the target disappears on the first attach. However,
since the next patch will let the caller supply an attach target, that will
also make it possible to attach to the same place multiple times.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/160138355059.48470.2503076992210324984.stgit@toke.dk
---
 include/linux/bpf.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 768b533ba48e..839dd8670a7a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -640,8 +640,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
 	return bpf_func(ctx, insnsi);
 }
 #ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_prog *prog);
-int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
@@ -688,11 +688,13 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
 #else
-static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
+					   struct bpf_trampoline *tr)
 {
 	return -ENOTSUPP;
 }
-static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog,
+					     struct bpf_trampoline *tr)
 {
 	return -ENOTSUPP;
 }
@@ -763,7 +765,9 @@ struct bpf_prog_aux {
 	u32 max_rdonly_access;
 	u32 max_rdwr_access;
 	const struct bpf_ctx_arg_aux *ctx_arg_info;
-	struct bpf_prog *linked_prog;
+	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
+	struct bpf_prog *dst_prog;
+	struct bpf_trampoline *dst_trampoline;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
@@ -771,7 +775,6 @@ struct bpf_prog_aux {
 	bool sleepable;
 	bool tail_call_reachable;
 	enum bpf_tramp_prog_type trampoline_prog_type;
-	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
-- 
cgit v1.2.3


From 4a1e7c0c63e02daad751842b7880f9bbcdfb6e89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 29 Sep 2020 14:45:51 +0200
Subject: bpf: Support attaching freplace programs to multiple attach points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables support for attaching freplace programs to multiple attach
points. It does this by amending the UAPI for bpf_link_Create with a target
btf ID that can be used to supply the new attachment point along with the
target program fd. The target must be compatible with the target that was
supplied at program load time.

The implementation reuses the checks that were factored out of
check_attach_btf_id() to ensure compatibility between the BTF types of the
old and new attachment. If these match, a new bpf_tracing_link will be
created for the new attach target, allowing multiple attachments to
co-exist simultaneously.

The code could theoretically support multiple-attach of other types of
tracing programs as well, but since I don't have a use case for any of
those, there is no API support for doing so.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/160138355169.48470.17165680973640685368.stgit@toke.dk
---
 include/linux/bpf.h      | 2 ++
 include/uapi/linux/bpf.h | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 839dd8670a7a..50e5c4b52bd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -768,6 +768,8 @@ struct bpf_prog_aux {
 	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
 	struct bpf_prog *dst_prog;
 	struct bpf_trampoline *dst_trampoline;
+	enum bpf_prog_type saved_dst_prog_type;
+	enum bpf_attach_type saved_dst_attach_type;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 96ddb00b91dc..2b1d3f16cbd1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -639,8 +639,13 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
-		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
-		__u32		iter_info_len;	/* iter_info length */
+		union {
+			__u32		target_btf_id;	/* btf_id of target to attach to */
+			struct {
+				__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+				__u32		iter_info_len;	/* iter_info length */
+			};
+		};
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
-- 
cgit v1.2.3


From 3f47cb4c1cf3bceb2438ea962bfffc6665ee4a9f Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 29 Sep 2020 13:35:41 +0100
Subject: l2tp: report rx cookie discards in netlink get

When an L2TPv3 session receives a data frame with an incorrect cookie
l2tp_core logs a warning message and bumps a stats counter to reflect
the fact that the packet has been dropped.

However, the stats counter in question is missing from the l2tp_netlink
get message for tunnel and session instances.

Include the statistic in the netlink get response.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 88a0d32b8c07..30c80d5ba4bf 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -144,6 +144,7 @@ enum {
 	L2TP_ATTR_RX_OOS_PACKETS,	/* u64 */
 	L2TP_ATTR_RX_ERRORS,		/* u64 */
 	L2TP_ATTR_STATS_PAD,
+	L2TP_ATTR_RX_COOKIE_DISCARDS,	/* u64 */
 	__L2TP_ATTR_STATS_MAX,
 };
 
-- 
cgit v1.2.3


From 2ec13cbcfadbbeac499f3b63de0f7db490d45a7e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Tue, 29 Sep 2020 11:08:59 -0700
Subject: devlink: include <linux/const.h> for _BITUL

Commit 5d5b4128c4ca ("devlink: introduce flash update overwrite mask")
added a usage of _BITUL to the UAPI <linux/devlink.h> header, but failed
to include the header file where it was defined. It happens that this
does not break any existing kernel include chains because it gets
included through other sources. However, when including the UAPI headers
in a userspace application (such as devlink in iproute2), _BITUL is not
defined.

Fixes: 5d5b4128c4ca ("devlink: introduce flash update overwrite mask")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 7b0face1bad5..ba467dc07852 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -13,6 +13,8 @@
 #ifndef _UAPI_LINUX_DEVLINK_H_
 #define _UAPI_LINUX_DEVLINK_H_
 
+#include <linux/const.h>
+
 #define DEVLINK_GENL_NAME "devlink"
 #define DEVLINK_GENL_VERSION 0x1
 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config"
-- 
cgit v1.2.3


From f2bf88c4afc8c5ab92b40af24819933e57d0968c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 Sep 2020 22:25:11 +0200
Subject: net: caif: Remove unused caif SPI driver

While chasing in_interrupt() (ab)use in drivers it turned out that the
caif_spi driver has never been in use since the driver was merged 10 years
ago. There never was any matching code which provides a platform device.

The driver has not seen any update (asided of treewide changes and
cleanups) since 8 years and the maintainers vanished from the planet.

So analysing the potential contexts and the (in)correctness of
in_interrupt() usage is just a pointless exercise.

Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_spi.h | 155 --------------------------------------------
 1 file changed, 155 deletions(-)
 delete mode 100644 include/net/caif/caif_spi.h

(limited to 'include')

diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h
deleted file mode 100644
index a0bf4cbce71b..000000000000
--- a/include/net/caif/caif_spi.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson AB 2010
- * Author:	Daniel Martensson / Daniel.Martensson@stericsson.com
- */
-
-#ifndef CAIF_SPI_H_
-#define CAIF_SPI_H_
-
-#include <net/caif/caif_device.h>
-
-#define SPI_CMD_WR			0x00
-#define SPI_CMD_RD			0x01
-#define SPI_CMD_EOT			0x02
-#define SPI_CMD_IND			0x04
-
-#define SPI_DMA_BUF_LEN			8192
-
-#define WL_SZ				2	/* 16 bits. */
-#define SPI_CMD_SZ			4	/* 32 bits. */
-#define SPI_IND_SZ			4	/* 32 bits. */
-
-#define SPI_XFER			0
-#define SPI_SS_ON			1
-#define SPI_SS_OFF			2
-#define SPI_TERMINATE			3
-
-/* Minimum time between different levels is 50 microseconds. */
-#define MIN_TRANSITION_TIME_USEC	50
-
-/* Defines for calculating duration of SPI transfers for a particular
- * number of bytes.
- */
-#define SPI_MASTER_CLK_MHZ		13
-#define SPI_XFER_TIME_USEC(bytes, clk) (((bytes) * 8) / clk)
-
-/* Normally this should be aligned on the modem in order to benefit from full
- * duplex transfers. However a size of 8188 provokes errors when running with
- * the modem. These errors occur when packet sizes approaches 4 kB of data.
- */
-#define CAIF_MAX_SPI_FRAME 4092
-
-/* Maximum number of uplink CAIF frames that can reside in the same SPI frame.
- * This number should correspond with the modem setting. The application side
- * CAIF accepts any number of embedded downlink CAIF frames.
- */
-#define CAIF_MAX_SPI_PKTS 9
-
-/* Decides if SPI buffers should be prefilled with 0xFF pattern for easier
- * debugging. Both TX and RX buffers will be filled before the transfer.
- */
-#define CFSPI_DBG_PREFILL		0
-
-/* Structure describing a SPI transfer. */
-struct cfspi_xfer {
-	u16 tx_dma_len;
-	u16 rx_dma_len;
-	void *va_tx[2];
-	dma_addr_t pa_tx[2];
-	void *va_rx;
-	dma_addr_t pa_rx;
-};
-
-/* Structure implemented by the SPI interface. */
-struct cfspi_ifc {
-	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
-	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
-	void *priv;
-};
-
-/* Structure implemented by SPI clients. */
-struct cfspi_dev {
-	int (*init_xfer) (struct cfspi_xfer *xfer, struct cfspi_dev *dev);
-	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev);
-	struct cfspi_ifc *ifc;
-	char *name;
-	u32 clk_mhz;
-	void *priv;
-};
-
-/* Enumeration describing the CAIF SPI state. */
-enum cfspi_state {
-	CFSPI_STATE_WAITING = 0,
-	CFSPI_STATE_AWAKE,
-	CFSPI_STATE_FETCH_PKT,
-	CFSPI_STATE_GET_NEXT,
-	CFSPI_STATE_INIT_XFER,
-	CFSPI_STATE_WAIT_ACTIVE,
-	CFSPI_STATE_SIG_ACTIVE,
-	CFSPI_STATE_WAIT_XFER_DONE,
-	CFSPI_STATE_XFER_DONE,
-	CFSPI_STATE_WAIT_INACTIVE,
-	CFSPI_STATE_SIG_INACTIVE,
-	CFSPI_STATE_DELIVER_PKT,
-	CFSPI_STATE_MAX,
-};
-
-/* Structure implemented by SPI physical interfaces. */
-struct cfspi {
-	struct caif_dev_common cfdev;
-	struct net_device *ndev;
-	struct platform_device *pdev;
-	struct sk_buff_head qhead;
-	struct sk_buff_head chead;
-	u16 cmd;
-	u16 tx_cpck_len;
-	u16 tx_npck_len;
-	u16 rx_cpck_len;
-	u16 rx_npck_len;
-	struct cfspi_ifc ifc;
-	struct cfspi_xfer xfer;
-	struct cfspi_dev *dev;
-	unsigned long state;
-	struct work_struct work;
-	struct workqueue_struct *wq;
-	struct list_head list;
-	int    flow_off_sent;
-	u32 qd_low_mark;
-	u32 qd_high_mark;
-	struct completion comp;
-	wait_queue_head_t wait;
-	spinlock_t lock;
-	bool flow_stop;
-	bool slave;
-	bool slave_talked;
-#ifdef CONFIG_DEBUG_FS
-	enum cfspi_state dbg_state;
-	u16 pcmd;
-	u16 tx_ppck_len;
-	u16 rx_ppck_len;
-	struct dentry *dbgfs_dir;
-	struct dentry *dbgfs_state;
-	struct dentry *dbgfs_frame;
-#endif				/* CONFIG_DEBUG_FS */
-};
-
-extern int spi_frm_align;
-extern int spi_up_head_align;
-extern int spi_up_tail_align;
-extern int spi_down_head_align;
-extern int spi_down_tail_align;
-extern struct platform_driver cfspi_spi_driver;
-
-void cfspi_dbg_state(struct cfspi *cfspi, int state);
-int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_xmitlen(struct cfspi *cfspi);
-int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_spi_remove(struct platform_device *pdev);
-int cfspi_spi_probe(struct platform_device *pdev);
-int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-int cfspi_xmitlen(struct cfspi *cfspi);
-int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
-void cfspi_xfer(struct work_struct *work);
-
-#endif				/* CAIF_SPI_H_ */
-- 
cgit v1.2.3


From c11171a413385c1a72291cdb4a7435cb189762ab Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 29 Sep 2020 22:25:12 +0200
Subject: net: Add netif_rx_any_context()

Quite some drivers make conditional decisions based on in_interrupt() to
invoke either netif_rx() or netif_rx_ni().

Conditionals based on in_interrupt() or other variants of preempt count
checks in drivers should not exist for various reasons and Linus clearly
requested to either split the code pathes or pass an argument to the
common functions which provides the context.

This is obviously the correct solution, but for some of the affected
drivers this needs a major rewrite due to their convoluted structure.

As in_interrupt() usage in drivers needs to be phased out, provide
netif_rx_any_context() as a stop gap for these drivers.

This confines the in_interrupt() conditional to core code which in turn
allows to remove the access to this check for driver code and provides one
central place to do further modifications once the driver maze is cleaned
up.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a431c3229cbf..28cfa53daf72 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3785,6 +3785,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
 int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
+int netif_rx_any_context(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
 int netif_receive_skb_core(struct sk_buff *skb);
 void netif_receive_skb_list(struct list_head *head);
-- 
cgit v1.2.3


From 3c0e37a9e4021ccbf855dfcbc5aff1ca10487cd4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 30 Sep 2020 01:27:21 +0300
Subject: net: mscc: ocelot: introduce a new ocelot_target_{read,write} API

There are some targets (register blocks) in the Ocelot switch that are
instantiated more than once. For example, the VCAP IS1, IS2 and ES0
blocks all share the same register layout for interacting with the cache
for the TCAM and the action RAM.

For the VCAPs, the procedure for servicing them is actually common. We
just need an API specifying which VCAP we are talking to, and we do that
via these raw ocelot_target_read and ocelot_target_write accessors.

In plain ocelot_read, the target is encoded into the register enum
itself:

	u16 target = reg >> TARGET_OFFSET;

For the VCAPs, the registers are currently defined like this:

	enum ocelot_reg {
	[...]
		S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
		S2_CORE_MV_CFG,
		S2_CACHE_ENTRY_DAT,
		S2_CACHE_MASK_DAT,
		S2_CACHE_ACTION_DAT,
		S2_CACHE_CNT_DAT,
		S2_CACHE_TG_DAT,
	[...]
	};

which is precisely what we want to avoid, because we'd have to duplicate
the same register map for S1 and for S0, and then figure out how to pass
VCAP instance-specific registers to the ocelot_read calls (basically
another lookup table that undoes the effect of shifting with
TARGET_OFFSET).

So for some targets, propose a more raw API, similar to what is
currently done with ocelot_port_readl and ocelot_port_writel. Those
targets can only be accessed with ocelot_target_{read,write} and not
with ocelot_{read,write} after the conversion, which is fine.

The VCAP registers are not actually modified to use this new API as of
this patch. They will be modified in the next one.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 3093385f6147..d459f4f25dc8 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -661,6 +661,24 @@ struct ocelot_policer {
 #define ocelot_fields_write(ocelot, id, reg, val) regmap_fields_write((ocelot)->regfields[(reg)], (id), (val))
 #define ocelot_fields_read(ocelot, id, reg, val) regmap_fields_read((ocelot)->regfields[(reg)], (id), (val))
 
+#define ocelot_target_read_ix(ocelot, target, reg, gi, ri) \
+	__ocelot_target_read_ix(ocelot, target, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
+#define ocelot_target_read_gix(ocelot, target, reg, gi) \
+	__ocelot_target_read_ix(ocelot, target, reg, reg##_GSZ * (gi))
+#define ocelot_target_read_rix(ocelot, target, reg, ri) \
+	__ocelot_target_read_ix(ocelot, target, reg, reg##_RSZ * (ri))
+#define ocelot_target_read(ocelot, target, reg) \
+	__ocelot_target_read_ix(ocelot, target, reg, 0)
+
+#define ocelot_target_write_ix(ocelot, target, val, reg, gi, ri) \
+	__ocelot_target_write_ix(ocelot, target, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
+#define ocelot_target_write_gix(ocelot, target, val, reg, gi) \
+	__ocelot_target_write_ix(ocelot, target, val, reg, reg##_GSZ * (gi))
+#define ocelot_target_write_rix(ocelot, target, val, reg, ri) \
+	__ocelot_target_write_ix(ocelot, target, val, reg, reg##_RSZ * (ri))
+#define ocelot_target_write(ocelot, target, val, reg) \
+	__ocelot_target_write_ix(ocelot, target, val, reg, 0)
+
 /* I/O */
 u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
 void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
@@ -668,6 +686,10 @@ u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
 void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
 void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
 		     u32 offset);
+u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
+			    u32 reg, u32 offset);
+void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
+			      u32 val, u32 reg, u32 offset);
 
 /* Hardware initialization */
 int ocelot_regfields_init(struct ocelot *ocelot,
-- 
cgit v1.2.3


From c1c3993edb7c8cfbe6b3991b4b4c9f673268770e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 30 Sep 2020 01:27:23 +0300
Subject: net: mscc: ocelot: generalize existing code for VCAP

In the Ocelot switches there are 3 TCAMs: VCAP ES0, IS1 and IS2, which
have the same configuration interface, but different sets of keys and
actions. The driver currently only supports VCAP IS2.

In preparation of VCAP IS1 and ES0 support, the existing code must be
generalized to work with any VCAP.

In that direction, we should move the structures that depend upon VCAP
instantiation, like vcap_is2_keys and vcap_is2_actions, out of struct
ocelot and into struct vcap_props .keys and .actions, a structure that
is replicated 3 times, once per VCAP. We'll pass that structure as an
argument to each function that does the key and action packing - only
the control logic needs to distinguish between ocelot->vcap[VCAP_IS2]
or IS1 or ES0.

Another change is to make use of the newly introduced ocelot_target_read
and ocelot_target_write API, since the 3 VCAPs have the same registers
but put at different addresses.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h      | 22 ++++++++-------
 include/soc/mscc/ocelot_vcap.h | 62 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index d459f4f25dc8..728b040e4e3e 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -393,13 +393,6 @@ enum ocelot_reg {
 	SYS_CM_DATA_RD,
 	SYS_CM_OP,
 	SYS_CM_DATA,
-	S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
-	S2_CORE_MV_CFG,
-	S2_CACHE_ENTRY_DAT,
-	S2_CACHE_MASK_DAT,
-	S2_CACHE_ACTION_DAT,
-	S2_CACHE_CNT_DAT,
-	S2_CACHE_TG_DAT,
 	PTP_PIN_CFG = PTP << TARGET_OFFSET,
 	PTP_PIN_TOD_SEC_MSB,
 	PTP_PIN_TOD_SEC_LSB,
@@ -518,6 +511,18 @@ enum ocelot_regfield {
 	REGFIELD_MAX
 };
 
+enum {
+	/* VCAP_CORE_CFG */
+	VCAP_CORE_UPDATE_CTRL,
+	VCAP_CORE_MV_CFG,
+	/* VCAP_CORE_CACHE */
+	VCAP_CACHE_ENTRY_DAT,
+	VCAP_CACHE_MASK_DAT,
+	VCAP_CACHE_ACTION_DAT,
+	VCAP_CACHE_CNT_DAT,
+	VCAP_CACHE_TG_DAT,
+};
+
 enum ocelot_ptp_pins {
 	PTP_PIN_0,
 	PTP_PIN_1,
@@ -614,9 +619,6 @@ struct ocelot {
 	struct list_head		multicast;
 
 	struct ocelot_vcap_block	block;
-
-	const struct vcap_field		*vcap_is2_keys;
-	const struct vcap_field		*vcap_is2_actions;
 	const struct vcap_props		*vcap;
 
 	/* Workqueue to check statistics for overflow with its lock */
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 5748373ab4d3..05466a1d7bd4 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -6,6 +6,8 @@
 #ifndef _OCELOT_VCAP_H_
 #define _OCELOT_VCAP_H_
 
+#include <soc/mscc/ocelot.h>
+
 /* =================================================================
  *  VCAP Common
  * =================================================================
@@ -33,6 +35,11 @@ struct vcap_props {
 	} action_table[2];
 	u16 counter_words; /* Number of counter words */
 	u16 counter_width; /* Counter width (in bits) */
+
+	enum ocelot_target		target;
+
+	const struct vcap_field		*keys;
+	const struct vcap_field		*actions;
 };
 
 /* VCAP Type-Group values */
@@ -41,6 +48,61 @@ struct vcap_props {
 #define VCAP_TG_HALF 2 /* Half entry */
 #define VCAP_TG_QUARTER 3 /* Quarter entry */
 
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_CMD(x)      (((x) << 22) & GENMASK(24, 22))
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_CMD_M       GENMASK(24, 22)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_CMD_X(x)    (((x) & GENMASK(24, 22)) >> 22)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS   BIT(21)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS  BIT(20)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_CNT_DIS     BIT(19)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_ADDR(x)     (((x) << 3) & GENMASK(18, 3))
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_ADDR_M      GENMASK(18, 3)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_ADDR_X(x)   (((x) & GENMASK(18, 3)) >> 3)
+#define VCAP_CORE_UPDATE_CTRL_UPDATE_SHOT        BIT(2)
+#define VCAP_CORE_UPDATE_CTRL_CLEAR_CACHE        BIT(1)
+#define VCAP_CORE_UPDATE_CTRL_MV_TRAFFIC_IGN     BIT(0)
+
+#define VCAP_CORE_MV_CFG_MV_NUM_POS(x)           (((x) << 16) & GENMASK(31, 16))
+#define VCAP_CORE_MV_CFG_MV_NUM_POS_M            GENMASK(31, 16)
+#define VCAP_CORE_MV_CFG_MV_NUM_POS_X(x)         (((x) & GENMASK(31, 16)) >> 16)
+#define VCAP_CORE_MV_CFG_MV_SIZE(x)              ((x) & GENMASK(15, 0))
+#define VCAP_CORE_MV_CFG_MV_SIZE_M               GENMASK(15, 0)
+
+#define VCAP_CACHE_ENTRY_DAT_RSZ                 0x4
+
+#define VCAP_CACHE_MASK_DAT_RSZ                  0x4
+
+#define VCAP_CACHE_ACTION_DAT_RSZ                0x4
+
+#define VCAP_CACHE_CNT_DAT_RSZ                   0x4
+
+#define VCAP_STICKY_VCAP_ROW_DELETED_STICKY      BIT(0)
+
+#define TCAM_BIST_CTRL_TCAM_BIST                 BIT(1)
+#define TCAM_BIST_CTRL_TCAM_INIT                 BIT(0)
+
+#define TCAM_BIST_CFG_TCAM_BIST_SOE_ENA          BIT(8)
+#define TCAM_BIST_CFG_TCAM_HCG_DIS               BIT(7)
+#define TCAM_BIST_CFG_TCAM_CG_DIS                BIT(6)
+#define TCAM_BIST_CFG_TCAM_BIAS(x)               ((x) & GENMASK(5, 0))
+#define TCAM_BIST_CFG_TCAM_BIAS_M                GENMASK(5, 0)
+
+#define TCAM_BIST_STAT_BIST_RT_ERR               BIT(15)
+#define TCAM_BIST_STAT_BIST_PENC_ERR             BIT(14)
+#define TCAM_BIST_STAT_BIST_COMP_ERR             BIT(13)
+#define TCAM_BIST_STAT_BIST_ADDR_ERR             BIT(12)
+#define TCAM_BIST_STAT_BIST_BL1E_ERR             BIT(11)
+#define TCAM_BIST_STAT_BIST_BL1_ERR              BIT(10)
+#define TCAM_BIST_STAT_BIST_BL0E_ERR             BIT(9)
+#define TCAM_BIST_STAT_BIST_BL0_ERR              BIT(8)
+#define TCAM_BIST_STAT_BIST_PH1_ERR              BIT(7)
+#define TCAM_BIST_STAT_BIST_PH0_ERR              BIT(6)
+#define TCAM_BIST_STAT_BIST_PV1_ERR              BIT(5)
+#define TCAM_BIST_STAT_BIST_PV0_ERR              BIT(4)
+#define TCAM_BIST_STAT_BIST_RUN                  BIT(3)
+#define TCAM_BIST_STAT_BIST_ERR                  BIT(2)
+#define TCAM_BIST_STAT_BIST_BUSY                 BIT(1)
+#define TCAM_BIST_STAT_TCAM_RDY                  BIT(0)
+
 /* =================================================================
  *  VCAP IS2
  * =================================================================
-- 
cgit v1.2.3


From a61e365d7c183c556717bbf36dcf00c941ec044e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 30 Sep 2020 01:27:24 +0300
Subject: net: mscc: ocelot: add definitions for VCAP IS1 keys, actions and
 target

As a preparation step for the offloading to IS1, let's create the
infrastructure for talking with this hardware block.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h      |  1 +
 include/soc/mscc/ocelot_vcap.h | 93 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 93 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 728b040e4e3e..d0073c94e22a 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -123,6 +123,7 @@ enum ocelot_target {
 	QSYS,
 	REW,
 	SYS,
+	S1,
 	S2,
 	HSIO,
 	PTP,
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 05466a1d7bd4..7ac184047292 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -14,7 +14,7 @@
  */
 
 enum {
-	/* VCAP_IS1, */
+	VCAP_IS1,
 	VCAP_IS2,
 	/* VCAP_ES0, */
 };
@@ -264,4 +264,95 @@ enum vcap_is2_action_field {
 	VCAP_IS2_ACT_HIT_CNT,
 };
 
+/* =================================================================
+ *  VCAP IS1
+ * =================================================================
+ */
+
+/* IS1 half key types */
+#define IS1_TYPE_S1_NORMAL 0
+#define IS1_TYPE_S1_5TUPLE_IP4 1
+
+/* IS1 full key types */
+#define IS1_TYPE_S1_NORMAL_IP6 0
+#define IS1_TYPE_S1_7TUPLE 1
+#define IS2_TYPE_S1_5TUPLE_IP6 2
+
+enum {
+	IS1_ACTION_TYPE_NORMAL,
+	IS1_ACTION_TYPE_MAX,
+};
+
+enum vcap_is1_half_key_field {
+	VCAP_IS1_HK_TYPE,
+	VCAP_IS1_HK_LOOKUP,
+	VCAP_IS1_HK_IGR_PORT_MASK,
+	VCAP_IS1_HK_RSV,
+	VCAP_IS1_HK_OAM_Y1731,
+	VCAP_IS1_HK_L2_MC,
+	VCAP_IS1_HK_L2_BC,
+	VCAP_IS1_HK_IP_MC,
+	VCAP_IS1_HK_VLAN_TAGGED,
+	VCAP_IS1_HK_VLAN_DBL_TAGGED,
+	VCAP_IS1_HK_TPID,
+	VCAP_IS1_HK_VID,
+	VCAP_IS1_HK_DEI,
+	VCAP_IS1_HK_PCP,
+	/* Specific Fields for IS1 Half Key S1_NORMAL */
+	VCAP_IS1_HK_L2_SMAC,
+	VCAP_IS1_HK_ETYPE_LEN,
+	VCAP_IS1_HK_ETYPE,
+	VCAP_IS1_HK_IP_SNAP,
+	VCAP_IS1_HK_IP4,
+	VCAP_IS1_HK_L3_FRAGMENT,
+	VCAP_IS1_HK_L3_FRAG_OFS_GT0,
+	VCAP_IS1_HK_L3_OPTIONS,
+	VCAP_IS1_HK_L3_DSCP,
+	VCAP_IS1_HK_L3_IP4_SIP,
+	VCAP_IS1_HK_TCP_UDP,
+	VCAP_IS1_HK_TCP,
+	VCAP_IS1_HK_L4_SPORT,
+	VCAP_IS1_HK_L4_RNG,
+	/* Specific Fields for IS1 Half Key S1_5TUPLE_IP4 */
+	VCAP_IS1_HK_IP4_INNER_TPID,
+	VCAP_IS1_HK_IP4_INNER_VID,
+	VCAP_IS1_HK_IP4_INNER_DEI,
+	VCAP_IS1_HK_IP4_INNER_PCP,
+	VCAP_IS1_HK_IP4_IP4,
+	VCAP_IS1_HK_IP4_L3_FRAGMENT,
+	VCAP_IS1_HK_IP4_L3_FRAG_OFS_GT0,
+	VCAP_IS1_HK_IP4_L3_OPTIONS,
+	VCAP_IS1_HK_IP4_L3_DSCP,
+	VCAP_IS1_HK_IP4_L3_IP4_DIP,
+	VCAP_IS1_HK_IP4_L3_IP4_SIP,
+	VCAP_IS1_HK_IP4_L3_PROTO,
+	VCAP_IS1_HK_IP4_TCP_UDP,
+	VCAP_IS1_HK_IP4_TCP,
+	VCAP_IS1_HK_IP4_L4_RNG,
+	VCAP_IS1_HK_IP4_IP_PAYLOAD_S1_5TUPLE,
+};
+
+enum vcap_is1_action_field {
+	VCAP_IS1_ACT_DSCP_ENA,
+	VCAP_IS1_ACT_DSCP_VAL,
+	VCAP_IS1_ACT_QOS_ENA,
+	VCAP_IS1_ACT_QOS_VAL,
+	VCAP_IS1_ACT_DP_ENA,
+	VCAP_IS1_ACT_DP_VAL,
+	VCAP_IS1_ACT_PAG_OVERRIDE_MASK,
+	VCAP_IS1_ACT_PAG_VAL,
+	VCAP_IS1_ACT_RSV,
+	VCAP_IS1_ACT_VID_REPLACE_ENA,
+	VCAP_IS1_ACT_VID_ADD_VAL,
+	VCAP_IS1_ACT_FID_SEL,
+	VCAP_IS1_ACT_FID_VAL,
+	VCAP_IS1_ACT_PCP_DEI_ENA,
+	VCAP_IS1_ACT_PCP_VAL,
+	VCAP_IS1_ACT_DEI_VAL,
+	VCAP_IS1_ACT_VLAN_POP_CNT_ENA,
+	VCAP_IS1_ACT_VLAN_POP_CNT,
+	VCAP_IS1_ACT_CUSTOM_ACE_TYPE_ENA,
+	VCAP_IS1_ACT_HIT_STICKY,
+};
+
 #endif /* _OCELOT_VCAP_H_ */
-- 
cgit v1.2.3


From e3aea296d86f0f2166f4ddc5e1325217f847e722 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 30 Sep 2020 01:27:25 +0300
Subject: net: mscc: ocelot: add definitions for VCAP ES0 keys, actions and
 target

As a preparation step for the offloading to ES0, let's create the
infrastructure for talking with this hardware block.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h      |  1 +
 include/soc/mscc/ocelot_vcap.h | 44 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index d0073c94e22a..b0a9efce8813 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -123,6 +123,7 @@ enum ocelot_target {
 	QSYS,
 	REW,
 	SYS,
+	S0,
 	S1,
 	S2,
 	HSIO,
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 7ac184047292..707e609ec919 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -14,9 +14,9 @@
  */
 
 enum {
+	VCAP_ES0,
 	VCAP_IS1,
 	VCAP_IS2,
-	/* VCAP_ES0, */
 };
 
 struct vcap_props {
@@ -355,4 +355,46 @@ enum vcap_is1_action_field {
 	VCAP_IS1_ACT_HIT_STICKY,
 };
 
+/* =================================================================
+ *  VCAP ES0
+ * =================================================================
+ */
+
+enum {
+	ES0_ACTION_TYPE_NORMAL,
+	ES0_ACTION_TYPE_MAX,
+};
+
+enum vcap_es0_key_field {
+	VCAP_ES0_EGR_PORT,
+	VCAP_ES0_IGR_PORT,
+	VCAP_ES0_RSV,
+	VCAP_ES0_L2_MC,
+	VCAP_ES0_L2_BC,
+	VCAP_ES0_VID,
+	VCAP_ES0_DP,
+	VCAP_ES0_PCP,
+};
+
+enum vcap_es0_action_field {
+	VCAP_ES0_ACT_PUSH_OUTER_TAG,
+	VCAP_ES0_ACT_PUSH_INNER_TAG,
+	VCAP_ES0_ACT_TAG_A_TPID_SEL,
+	VCAP_ES0_ACT_TAG_A_VID_SEL,
+	VCAP_ES0_ACT_TAG_A_PCP_SEL,
+	VCAP_ES0_ACT_TAG_A_DEI_SEL,
+	VCAP_ES0_ACT_TAG_B_TPID_SEL,
+	VCAP_ES0_ACT_TAG_B_VID_SEL,
+	VCAP_ES0_ACT_TAG_B_PCP_SEL,
+	VCAP_ES0_ACT_TAG_B_DEI_SEL,
+	VCAP_ES0_ACT_VID_A_VAL,
+	VCAP_ES0_ACT_PCP_A_VAL,
+	VCAP_ES0_ACT_DEI_A_VAL,
+	VCAP_ES0_ACT_VID_B_VAL,
+	VCAP_ES0_ACT_PCP_B_VAL,
+	VCAP_ES0_ACT_DEI_B_VAL,
+	VCAP_ES0_ACT_RSV,
+	VCAP_ES0_ACT_HIT_STICKY,
+};
+
 #endif /* _OCELOT_VCAP_H_ */
-- 
cgit v1.2.3


From 2096805497e2bd21df3c26bd48c43ff0ce954316 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 30 Sep 2020 01:27:26 +0300
Subject: net: mscc: ocelot: automatically detect VCAP constants

The numbers in struct vcap_props are not intuitive to derive, because
they are not a straightforward copy-and-paste from the reference manual
but instead rely on a fairly detailed level of understanding of the
layout of an entry in the TCAM and in the action RAM. For this reason,
bugs are very easy to introduce here.

Ease the work of hardware porters and read from hardware the constants
that were exported for this particular purpose. Note that this implies
that struct vcap_props can no longer be const.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h      | 13 ++++++++++++-
 include/soc/mscc/ocelot_vcap.h |  3 +++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index b0a9efce8813..0c40122dcb88 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -523,6 +523,17 @@ enum {
 	VCAP_CACHE_ACTION_DAT,
 	VCAP_CACHE_CNT_DAT,
 	VCAP_CACHE_TG_DAT,
+	/* VCAP_CONST */
+	VCAP_CONST_VCAP_VER,
+	VCAP_CONST_ENTRY_WIDTH,
+	VCAP_CONST_ENTRY_CNT,
+	VCAP_CONST_ENTRY_SWCNT,
+	VCAP_CONST_ENTRY_TG_WIDTH,
+	VCAP_CONST_ACTION_DEF_CNT,
+	VCAP_CONST_ACTION_WIDTH,
+	VCAP_CONST_CNT_WIDTH,
+	VCAP_CONST_CORE_CNT,
+	VCAP_CONST_IF_CNT,
 };
 
 enum ocelot_ptp_pins {
@@ -621,7 +632,7 @@ struct ocelot {
 	struct list_head		multicast;
 
 	struct ocelot_vcap_block	block;
-	const struct vcap_props		*vcap;
+	struct vcap_props		*vcap;
 
 	/* Workqueue to check statistics for overflow with its lock */
 	struct mutex			stats_lock;
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 707e609ec919..96300adf3648 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -17,8 +17,11 @@ enum {
 	VCAP_ES0,
 	VCAP_IS1,
 	VCAP_IS2,
+	__VCAP_COUNT,
 };
 
+#define OCELOT_NUM_VCAP_BLOCKS		__VCAP_COUNT
+
 struct vcap_props {
 	u16 tg_width; /* Type-group width (in bits) */
 	u16 sw_count; /* Sub word count */
-- 
cgit v1.2.3


From 002f2176532093753cb6ced61e5ea7b8904c6cae Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Mon, 28 Sep 2020 14:27:10 +0200
Subject: netfilter: nf_tables: add userdata attributes to nft_chain

Enables storing userdata for nft_chain. Field udata points to user data
and udlen stores its length.

Adds new attribute flag NFTA_CHAIN_USERDATA.

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 2 ++
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c4c526507ddb..0bd2a081ae39 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -945,6 +945,8 @@ struct nft_chain {
 					bound:1,
 					genmask:2;
 	char				*name;
+	u16				udlen;
+	u8				*udata;
 
 	/* Only used during control plane commit phase: */
 	struct nft_rule			**rules_next;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 3c2469b43742..352ee51707a1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -208,6 +208,7 @@ enum nft_chain_flags {
  * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes)
  * @NFTA_CHAIN_FLAGS: chain flags
  * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32)
+ * @NFTA_CHAIN_USERDATA: user data (NLA_BINARY)
  */
 enum nft_chain_attributes {
 	NFTA_CHAIN_UNSPEC,
@@ -222,6 +223,7 @@ enum nft_chain_attributes {
 	NFTA_CHAIN_PAD,
 	NFTA_CHAIN_FLAGS,
 	NFTA_CHAIN_ID,
+	NFTA_CHAIN_USERDATA,
 	__NFTA_CHAIN_MAX
 };
 #define NFTA_CHAIN_MAX		(__NFTA_CHAIN_MAX - 1)
-- 
cgit v1.2.3


From b426ce83baa7dff947fb354118d3133f2953aac8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2020 17:18:15 +0200
Subject: bpf: Add classid helper only based on skb->sk

Similarly to 5a52ae4e32a6 ("bpf: Allow to retrieve cgroup v1 classid
from v2 hooks"), add a helper to retrieve cgroup v1 classid solely
based on the skb->sk, so it can be used as key as part of BPF map
lookups out of tc from host ns, in particular given the skb->sk is
retained these days when crossing net ns thanks to 9c4c325252c5
("skbuff: preserve sock reference when scrubbing the skb."). This
is similar to bpf_skb_cgroup_id() which implements the same for v2.
Kubernetes ecosystem is still operating on v1 however, hence net_cls
needs to be used there until this can be dropped in with the v2
helper of bpf_skb_cgroup_id().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/ed633cf27a1c620e901c5aa99ebdefb028dce600.1601477936.git.daniel@iogearbox.net
---
 include/uapi/linux/bpf.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2b1d3f16cbd1..6116a7f54c8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3643,6 +3643,15 @@ union bpf_attr {
  *		*flags* are identical to those used for bpf_snprintf_btf.
  *	Return
  *		0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * 	Description
+ * 		See **bpf_get_cgroup_classid**\ () for the main description.
+ * 		This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * 		the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * 		associated socket instead of the current process.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3796,6 +3805,7 @@ union bpf_attr {
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
+	FN(skb_cgroup_classid),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 92acdc58ab11af66fcaef485433fde61b5e32fac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2020 17:18:16 +0200
Subject: bpf, net: Rework cookie generator as per-cpu one

With its use in BPF, the cookie generator can be called very frequently
in particular when used out of cgroup v2 hooks (e.g. connect / sendmsg)
and attached to the root cgroup, for example, when used in v1/v2 mixed
environments. In particular, when there's a high churn on sockets in the
system there can be many parallel requests to the bpf_get_socket_cookie()
and bpf_get_netns_cookie() helpers which then cause contention on the
atomic counter.

As similarly done in f991bd2e1421 ("fs: introduce a per-cpu last_ino
allocator"), add a small helper library that both can use for the 64 bit
counters. Given this can be called from different contexts, we also need
to deal with potential nested calls even though in practice they are
considered extremely rare. One idea as suggested by Eric Dumazet was
to use a reverse counter for this situation since we don't expect 64 bit
overflows anyways; that way, we can avoid bigger gaps in the 64 bit
counter space compared to just batch-wise increase. Even on machines
with small number of cores (e.g. 4) the cookie generation shrinks from
min/max/med/avg (ns) of 22/50/40/38.9 down to 10/35/14/17.3 when run
in parallel from multiple CPUs.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Link: https://lore.kernel.org/bpf/8a80b8d27d3c49f9a14e1d5213c19d8be87d1dc8.1601477936.git.daniel@iogearbox.net
---
 include/linux/cookie.h      | 51 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sock_diag.h   | 14 ++++++++++++-
 include/net/net_namespace.h |  2 +-
 3 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/cookie.h

(limited to 'include')

diff --git a/include/linux/cookie.h b/include/linux/cookie.h
new file mode 100644
index 000000000000..0c159f585109
--- /dev/null
+++ b/include/linux/cookie.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COOKIE_H
+#define __LINUX_COOKIE_H
+
+#include <linux/atomic.h>
+#include <linux/percpu.h>
+#include <asm/local.h>
+
+struct pcpu_gen_cookie {
+	local_t nesting;
+	u64 last;
+} __aligned(16);
+
+struct gen_cookie {
+	struct pcpu_gen_cookie __percpu *local;
+	atomic64_t forward_last ____cacheline_aligned_in_smp;
+	atomic64_t reverse_last;
+};
+
+#define COOKIE_LOCAL_BATCH	4096
+
+#define DEFINE_COOKIE(name)						\
+	static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name);	\
+	static struct gen_cookie name = {				\
+		.local		= &__##name,				\
+		.forward_last	= ATOMIC64_INIT(0),			\
+		.reverse_last	= ATOMIC64_INIT(0),			\
+	}
+
+static __always_inline u64 gen_cookie_next(struct gen_cookie *gc)
+{
+	struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local);
+	u64 val;
+
+	if (likely(local_inc_return(&local->nesting) == 1)) {
+		val = local->last;
+		if (__is_defined(CONFIG_SMP) &&
+		    unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) {
+			s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH,
+						       &gc->forward_last);
+			val = next - COOKIE_LOCAL_BATCH;
+		}
+		local->last = ++val;
+	} else {
+		val = atomic64_dec_return(&gc->reverse_last);
+	}
+	local_dec(&local->nesting);
+	return val;
+}
+
+#endif /* __LINUX_COOKIE_H */
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 15fe980a27ea..0b9ecd8cf979 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -25,7 +25,19 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-u64 sock_gen_cookie(struct sock *sk);
+u64 __sock_gen_cookie(struct sock *sk);
+
+static inline u64 sock_gen_cookie(struct sock *sk)
+{
+	u64 cookie;
+
+	preempt_disable();
+	cookie = __sock_gen_cookie(sk);
+	preempt_enable();
+
+	return cookie;
+}
+
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2ee5901bec7a..22bc07f4b043 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -230,7 +230,7 @@ extern struct list_head net_namespace_list;
 struct net *get_net_ns_by_pid(pid_t pid);
 struct net *get_net_ns_by_fd(int fd);
 
-u64 net_gen_cookie(struct net *net);
+u64 __net_gen_cookie(struct net *net);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
-- 
cgit v1.2.3


From b4ab31414970a7a03a5d55d75083f2c101a30592 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2020 17:18:17 +0200
Subject: bpf: Add redirect_neigh helper as redirect drop-in

Add a redirect_neigh() helper as redirect() drop-in replacement
for the xmit side. Main idea for the helper is to be very similar
in semantics to the latter just that the skb gets injected into
the neighboring subsystem in order to let the stack do the work
it knows best anyway to populate the L2 addresses of the packet
and then hand over to dev_queue_xmit() as redirect() does.

This solves two bigger items: i) skbs don't need to go up to the
stack on the host facing veth ingress side for traffic egressing
the container to achieve the same for populating L2 which also
has the huge advantage that ii) the skb->sk won't get orphaned in
ip_rcv_core() when entering the IP routing layer on the host stack.

Given that skb->sk neither gets orphaned when crossing the netns
as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing
the skb.") the helper can then push the skbs directly to the phys
device where FQ scheduler can do its work and TCP stack gets proper
backpressure given we hold on to skb->sk as long as skb is still
residing in queues.

With the helper used in BPF data path to then push the skb to the
phys device, I observed a stable/consistent TCP_STREAM improvement
on veth devices for traffic going container -> host -> host ->
container from ~10Gbps to ~15Gbps for a single stream in my test
environment.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net
---
 include/linux/skbuff.h   |  5 +++++
 include/uapi/linux/bpf.h | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 04a18e01b362..3d0cf3722bb4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
 }
 
+static inline void skb_unset_mac_header(struct sk_buff *skb)
+{
+	skb->mac_header = (typeof(skb->mac_header))~0U;
+}
+
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
 	skb->mac_header = skb->data - skb->head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6116a7f54c8f..1f17c6752deb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3652,6 +3652,19 @@ union bpf_attr {
  * 		associated socket instead of the current process.
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*
+ * 		and fill in L2 addresses from neighboring subsystem. This helper
+ * 		is somewhat similar to **bpf_redirect**\ (), except that it
+ * 		fills in e.g. MAC addresses based on the L3 information from
+ * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3806,6 +3819,7 @@ union bpf_attr {
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
+	FN(redirect_neigh),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 20c168be684a97b084525906eb7ed017b7f9c0b8 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 30 Sep 2020 12:50:59 +0200
Subject: net: macb: move pdata to private header

struct macb_platform_data is only used by macb_pci to register the platform
device, move its definition to cadence/macb.h and remove platform_data/macb.h

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/platform_data/macb.h | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 include/linux/platform_data/macb.h

(limited to 'include')

diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
deleted file mode 100644
index aa5b5562d6f7..000000000000
--- a/include/linux/platform_data/macb.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004-2006 Atmel Corporation
- */
-#ifndef __MACB_PDATA_H__
-#define __MACB_PDATA_H__
-
-#include <linux/clk.h>
-
-/**
- * struct macb_platform_data - platform data for MACB Ethernet
- * @pclk:		platform clock
- * @hclk:		AHB clock
- */
-struct macb_platform_data {
-	struct clk	*pclk;
-	struct clk	*hclk;
-};
-
-#endif /* __MACB_PDATA_H__ */
-- 
cgit v1.2.3


From b6b6d6533a14b5ddcf9f9c5239fc3721fc6beda0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Sep 2020 05:54:56 -0700
Subject: inet: remove icsk_ack.blocked

TCP has been using it to work around the possibility of tcp_delack_timer()
finding the socket owned by user.

After commit 6f458dfb4092 ("tcp: improve latencies of timer triggered events")
we added TCP_DELACK_TIMER_DEFERRED atomic bit for more immediate recovery,
so we can get rid of icsk_ack.blocked

This frees space that following patch will reuse.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index dc763ca9413c..79875f976190 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -110,7 +110,7 @@ struct inet_connection_sock {
 		__u8		  pending;	 /* ACK is pending			   */
 		__u8		  quick;	 /* Scheduled number of quick acks	   */
 		__u8		  pingpong;	 /* The session is interactive		   */
-		__u8		  blocked;	 /* Delayed ACK was blocked by socket lock */
+		/* one byte hole. */
 		__u32		  ato;		 /* Predicted tick of soft clock	   */
 		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
 		__u32		  lrcvtime;	 /* timestamp of last received data packet */
@@ -198,7 +198,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 		sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
 #endif
 	} else if (what == ICSK_TIME_DACK) {
-		icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
+		icsk->icsk_ack.pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
 		sk_stop_timer(sk, &icsk->icsk_delack_timer);
 #endif
-- 
cgit v1.2.3


From a37c2134bed6f28c6d6aefa2699331e6e4e9c4f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Sep 2020 05:54:57 -0700
Subject: tcp: add exponential backoff in __tcp_send_ack()

Whenever host is under very high memory pressure,
__tcp_send_ack() skb allocation fails, and we setup
a 200 ms (TCP_DELACK_MAX) timer before retrying.

On hosts with high number of TCP sockets, we can spend
considerable amount of cpu cycles in these attempts,
add high pressure on various spinlocks in mm-layer,
ultimately blocking threads attempting to free space
from making any progress.

This patch adds standard exponential backoff to avoid
adding fuel to the fire.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 79875f976190..7338b3865a2a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -110,7 +110,7 @@ struct inet_connection_sock {
 		__u8		  pending;	 /* ACK is pending			   */
 		__u8		  quick;	 /* Scheduled number of quick acks	   */
 		__u8		  pingpong;	 /* The session is interactive		   */
-		/* one byte hole. */
+		__u8		  retry;	 /* Number of attempts			   */
 		__u32		  ato;		 /* Predicted tick of soft clock	   */
 		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
 		__u32		  lrcvtime;	 /* timestamp of last received data packet */
@@ -199,6 +199,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 #endif
 	} else if (what == ICSK_TIME_DACK) {
 		icsk->icsk_ack.pending = 0;
+		icsk->icsk_ack.retry = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
 		sk_stop_timer(sk, &icsk->icsk_delack_timer);
 #endif
-- 
cgit v1.2.3


From 5b88823bfe0875b327cc041017b5dcbec9dcbcc8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 29 Sep 2020 11:15:50 +0300
Subject: devlink: Add a tracepoint for trap reports

Add a tracepoint for trap reports so that drop monitor could register
its probe on it. Use trace_devlink_trap_report_enabled() to avoid
wasting cycles setting the trap metadata if the tracepoint is not
enabled.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h          | 14 ++++++++++++++
 include/trace/events/devlink.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7339bf9ba6b4..1014294ba6a0 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -624,6 +624,20 @@ struct devlink_health_reporter_ops {
 		    struct netlink_ext_ack *extack);
 };
 
+/**
+ * struct devlink_trap_metadata - Packet trap metadata.
+ * @trap_name: Trap name.
+ * @trap_group_name: Trap group name.
+ * @input_dev: Input netdevice.
+ * @fa_cookie: Flow action user cookie.
+ */
+struct devlink_trap_metadata {
+	const char *trap_name;
+	const char *trap_group_name;
+	struct net_device *input_dev;
+	const struct flow_action_cookie *fa_cookie;
+};
+
 /**
  * struct devlink_trap_policer - Immutable packet trap policer attributes.
  * @id: Policer identifier.
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 6f60a78d9a7e..44d8e2981065 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -171,6 +171,43 @@ TRACE_EVENT(devlink_health_reporter_state_update,
 		  __entry->new_state)
 );
 
+/*
+ * Tracepoint for devlink packet trap:
+ */
+TRACE_EVENT(devlink_trap_report,
+	TP_PROTO(const struct devlink *devlink, struct sk_buff *skb,
+		 const struct devlink_trap_metadata *metadata),
+
+	TP_ARGS(devlink, skb, metadata),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(trap_name, metadata->trap_name)
+		__string(trap_group_name, metadata->trap_group_name)
+		__dynamic_array(char, input_dev_name, IFNAMSIZ)
+	),
+
+	TP_fast_assign(
+		struct net_device *input_dev = metadata->input_dev;
+
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(trap_name, metadata->trap_name);
+		__assign_str(trap_group_name, metadata->trap_group_name);
+		__assign_str(input_dev_name,
+			     (input_dev ? input_dev->name : "NULL"));
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s trap_name=%s "
+		  "trap_group_name=%s input_dev_name=%s", __get_str(bus_name),
+		  __get_str(dev_name), __get_str(driver_name),
+		  __get_str(trap_name), __get_str(trap_group_name),
+		  __get_str(input_dev_name))
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 8ee2267ad33e0ba021e9dd9b437f773906cd99d6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 29 Sep 2020 11:15:52 +0300
Subject: drop_monitor: Convert to using devlink tracepoint

Convert drop monitor to use the recently introduced
'devlink_trap_report' tracepoint instead of having devlink call into
drop monitor.

This is both consistent with software originated drops ('kfree_skb'
tracepoint) and also allows drop monitor to be built as a module and
still report hardware originated drops.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/drop_monitor.h | 36 ------------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 include/net/drop_monitor.h

(limited to 'include')

diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h
deleted file mode 100644
index 3f5b6ddb3179..000000000000
--- a/include/net/drop_monitor.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef _NET_DROP_MONITOR_H_
-#define _NET_DROP_MONITOR_H_
-
-#include <linux/ktime.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/flow_offload.h>
-
-/**
- * struct net_dm_hw_metadata - Hardware-supplied packet metadata.
- * @trap_group_name: Hardware trap group name.
- * @trap_name: Hardware trap name.
- * @input_dev: Input netdevice.
- * @fa_cookie: Flow action user cookie.
- */
-struct net_dm_hw_metadata {
-	const char *trap_group_name;
-	const char *trap_name;
-	struct net_device *input_dev;
-	const struct flow_action_cookie *fa_cookie;
-};
-
-#if IS_REACHABLE(CONFIG_NET_DROP_MONITOR)
-void net_dm_hw_report(struct sk_buff *skb,
-		      const struct net_dm_hw_metadata *hw_metadata);
-#else
-static inline void
-net_dm_hw_report(struct sk_buff *skb,
-		 const struct net_dm_hw_metadata *hw_metadata)
-{
-}
-#endif
-
-#endif /* _NET_DROP_MONITOR_H_ */
-- 
cgit v1.2.3


From 93e155967ccc053b71d408edf8c0142199df5c8c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 29 Sep 2020 11:15:55 +0300
Subject: drop_monitor: Filter control packets in drop monitor

Previously, devlink called into drop monitor in order to report hardware
originated drops / exceptions. devlink intentionally filtered control
packets and did not pass them to drop monitor as they were not dropped
by the underlying hardware.

Now drop monitor registers its probe on a generic 'devlink_trap_report'
tracepoint and should therefore perform this filtering itself instead of
having devlink do that.

Add the trap type as metadata and have drop monitor ignore control
packets.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1014294ba6a0..1c286e9a3590 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -630,12 +630,14 @@ struct devlink_health_reporter_ops {
  * @trap_group_name: Trap group name.
  * @input_dev: Input netdevice.
  * @fa_cookie: Flow action user cookie.
+ * @trap_type: Trap type.
  */
 struct devlink_trap_metadata {
 	const char *trap_name;
 	const char *trap_group_name;
 	struct net_device *input_dev;
 	const struct flow_action_cookie *fa_cookie;
+	enum devlink_trap_type trap_type;
 };
 
 /**
-- 
cgit v1.2.3


From 7cd7becdddb00620fb8deb74e6fe4e5a1522ae5a Mon Sep 17 00:00:00 2001
From: sunils <sunils@nvidia.com>
Date: Fri, 11 Sep 2020 00:13:26 +0300
Subject: net/mlx5: E-switch, Use PF num in metadata reg c0

Currently only 256 vports can be supported as only 8 bits are
reserved for them and 8 bits are reserved for vhca_ids in
metadata reg c0. To support more than 256 vports, replace
vhca_id with a unique shorter 4-bit PF number which covers
upto 16 PF's. Use remaining 12 bits for vports ranging 1-4095.
This will continue to generate unique metadata even if
multiple PCI devices have same switch_id.

Signed-off-by: sunils <sunils@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/eswitch.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index c16827eeba9c..b0ae8020f13e 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -74,15 +74,16 @@ bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw);
 bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
 
 /* Reg C0 usage:
- * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) >
+ * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_CHAIN_TAG(16) >
  *
- * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num,
- * the rest (lowest 16 bits) is left for tc chain tag restoration.
- * VHCA_ID + VPORT comprise the SOURCE_PORT matching.
+ * Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of
+ * unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left
+ * for tc chain tag restoration.
+ * PFNUM + VPORT comprise the SOURCE_PORT matching.
  */
-#define ESW_VHCA_ID_BITS 8
-#define ESW_VPORT_BITS 8
-#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS)
+#define ESW_VPORT_BITS 12
+#define ESW_PFNUM_BITS 4
+#define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS)
 #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
 #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
 #define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\
-- 
cgit v1.2.3


From 792caccc4526bb489e054f9ab61d7c024b15dea2 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 30 Sep 2020 15:49:26 -0700
Subject: bpf: Introduce BPF_F_PRESERVE_ELEMS for perf event array

Currently, perf event in perf event array is removed from the array when
the map fd used to add the event is closed. This behavior makes it
difficult to the share perf events with perf event array.

Introduce perf event map that keeps the perf event open with a new flag
BPF_F_PRESERVE_ELEMS. With this flag set, perf events in the array are not
removed when the original map fd is closed. Instead, the perf event will
stay in the map until 1) it is explicitly removed from the array; or 2)
the array is freed.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200930224927.1936644-2-songliubraving@fb.com
---
 include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1f17c6752deb..4f556cfcbfbe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -414,6 +414,9 @@ enum {
 
 /* Enable memory-mapping BPF map */
 	BPF_F_MMAPABLE		= (1U << 10),
+
+/* Share perf_event among processes */
+	BPF_F_PRESERVE_ELEMS	= (1U << 11),
 };
 
 /* Flags for BPF_PROG_QUERY. */
-- 
cgit v1.2.3


From 82f45c6c4a70622cc0585e3f4372e192a6491d26 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 1 Oct 2020 18:34:48 -0700
Subject: bpf: tcp: Do not limit cb_flags when creating child sk from listen sk

The commit 0813a841566f ("bpf: tcp: Allow bpf prog to write and parse TCP header option")
unnecessarily introduced bpf_skops_init_child() which limited the child
sk from inheriting all bpf_sock_ops_cb_flags of the listen sk.  That
breaks existing user expectation.

This patch removes the bpf_skops_init_child() and just allows
sock_copy() to do its job to copy everything from listen sk to
the child sk.

Fixes: 0813a841566f ("bpf: tcp: Allow bpf prog to write and parse TCP header option")
Reported-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201002013448.2542025-1-kafai@fb.com
---
 include/net/tcp.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3601dea931a6..d4ef5bf94168 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 #endif /* CONFIG_NET_SOCK_MSG */
 
 #ifdef CONFIG_CGROUP_BPF
-/* Copy the listen sk's HDR_OPT_CB flags to its child.
- *
- * During 3-Way-HandShake, the synack is usually sent from
- * the listen sk with the HDR_OPT_CB flags set so that
- * bpf-prog will be called to write the BPF hdr option.
- *
- * In fastopen, the child sk is used to send synack instead
- * of the listen sk.  Thus, inheriting the HDR_OPT_CB flags
- * from the listen sk gives the bpf-prog a chance to write
- * BPF hdr option in the synack pkt during fastopen.
- *
- * Both fastopen and non-fastopen child will inherit the
- * HDR_OPT_CB flags to keep the bpf-prog having a consistent
- * behavior when deciding to clear this cb flags (or not)
- * during the PASSIVE_ESTABLISHED_CB.
- *
- * In the future, other cb flags could be inherited here also.
- */
-static inline void bpf_skops_init_child(const struct sock *sk,
-					struct sock *child)
-{
-	tcp_sk(child)->bpf_sock_ops_cb_flags =
-		tcp_sk(sk)->bpf_sock_ops_cb_flags &
-		(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
-		 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
-		 BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
-}
-
 static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
 				      struct sk_buff *skb,
 				      unsigned int end_offset)
@@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
 	skops->skb_data_end = skb->data + end_offset;
 }
 #else
-static inline void bpf_skops_init_child(const struct sock *sk,
-					struct sock *child)
-{
-}
-
 static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
 				      struct sk_buff *skb,
 				      unsigned int end_offset)
-- 
cgit v1.2.3


From 949ca6b82e43b342dba153a9fd643fb1b5e9f034 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 2 Oct 2020 09:46:04 +0200
Subject: netlink: fix policy dump leak

[ Upstream commit a95bc734e60449e7b073ff7ff70c35083b290ae9 ]

If userspace doesn't complete the policy dump, we leak the
allocated state. Fix this.

Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index b2cf34f53e55..9e7eca961a98 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1938,7 +1938,8 @@ void nla_get_range_signed(const struct nla_policy *pt,
 int netlink_policy_dump_start(const struct nla_policy *policy,
 			      unsigned int maxtype,
 			      unsigned long *state);
-bool netlink_policy_dump_loop(unsigned long *state);
+bool netlink_policy_dump_loop(unsigned long state);
 int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state);
+void netlink_policy_dump_free(unsigned long state);
 
 #endif
-- 
cgit v1.2.3


From 1dc0408cdf3caf3a8b8ad97c831ae52d2ab5b953 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Oct 2020 19:42:12 -0700
Subject: net: dsa: Call dsa_untag_bridge_pvid() from dsa_switch_rcv()

When a DSA switch driver needs to call dsa_untag_bridge_pvid(), it can
set dsa_switch::untag_brige_pvid to indicate this is necessary.

This is a pre-requisite to making sure that we are always calling
dsa_untag_bridge_pvid() after eth_type_trans() has been called.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b502a63d196e..8b0696e08cac 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -308,6 +308,14 @@ struct dsa_switch {
 	 */
 	bool			configure_vlan_while_not_filtering;
 
+	/* If the switch driver always programs the CPU port as egress tagged
+	 * despite the VLAN configuration indicating otherwise, then setting
+	 * @untag_bridge_pvid will force the DSA receive path to pop the bridge's
+	 * default_pvid VLAN tagged frames to offer a consistent behavior
+	 * between a vlan_filtering=0 and vlan_filtering=1 bridge device.
+	 */
+	bool			untag_bridge_pvid;
+
 	/* In case vlan_filtering_is_global is set, the VLAN awareness state
 	 * should be retrieved from here and not from the per-port settings.
 	 */
-- 
cgit v1.2.3


From 4976b718c3551faba2c0616ef55ebeb74db1c5ca Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:44 -0700
Subject: bpf: Introduce pseudo_btf_id

Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
ksym so that further dereferences on the ksym can use the BTF info
to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
the verifier reads the btf_id stored in the insn[0]'s imm field and
marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
which is encoded in btf_vminux by pahole. If the VAR is not of a struct
type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
and the mem_size is resolved to the size of the VAR's type.

>From the VAR btf_id, the verifier can also read the address of the
ksym's corresponding kernel var from kallsyms and use that to fill
dst_reg.

Therefore, the proper functionality of pseudo_btf_id depends on (1)
kallsyms and (2) the encoding of kernel global VARs in pahole, which
should be available since pahole v1.18.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-2-haoluo@google.com
---
 include/linux/bpf_verifier.h |  7 +++++++
 include/linux/btf.h          | 15 +++++++++++++++
 include/uapi/linux/bpf.h     | 36 +++++++++++++++++++++++++++---------
 3 files changed, 49 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 363b4f1c562a..e83ef6f6bf43 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
 			u32 map_index;		/* index into used_maps[] */
 			u32 map_off;		/* offset from value base address */
 		};
+		struct {
+			enum bpf_reg_type reg_type;	/* type of pseudo_btf_id */
+			union {
+				u32 btf_id;	/* btf_id for struct typed var */
+				u32 mem_size;	/* mem_size for non-struct typed var */
+			};
+		} btf_var;
 	};
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 024e16ff7dcc..af1244180588 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -145,6 +145,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
 }
 
+static inline bool btf_type_is_var(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static inline bool btf_type_is_struct(const struct btf_type *t)
+{
+	u8 kind = BTF_INFO_KIND(t->info);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
 static inline u16 btf_type_vlen(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f556cfcbfbe..2aa156af24d6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -356,18 +356,36 @@ enum bpf_link_type {
 #define BPF_F_SLEEPABLE		(1U << 4)
 
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd              map fd
- * insn[1].imm:      0                   offset into value
- * insn[0].off:      0                   0
- * insn[1].off:      0                   0
- * ldimm64 rewrite:  address of map      address of map[0]+offset
- * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
+ * insn[0].imm:      map fd
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map
+ * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd
+ * insn[1].imm:      offset into value
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map[0]+offset
+ * verifier type:    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_VALUE	2
+/* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
+ * insn[0].imm:      kernel btd id of VAR
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the kernel variable
+ * verifier type:    PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ *                   is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID	3
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
-- 
cgit v1.2.3


From eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:47 -0700
Subject: bpf: Introduce bpf_per_cpu_ptr()

Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
except that it may return NULL. This happens when the cpu parameter is
out of range. So the caller must check the returned value.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com
---
 include/linux/bpf.h      |  4 ++++
 include/linux/btf.h      | 11 +++++++++++
 include/uapi/linux/bpf.h | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50e5c4b52bd1..9dde15b2479d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -293,6 +293,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
+	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 	__BPF_ARG_TYPE_MAX,
 };
 
@@ -307,6 +308,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
+	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -405,6 +407,7 @@ enum bpf_reg_type {
 	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
 	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
+	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -1828,6 +1831,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index af1244180588..2bf641829664 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 	     i < btf_type_vlen(struct_type);			\
 	     i++, member++)
 
+#define for_each_vsi(i, datasec_type, member)			\
+	for (i = 0, member = btf_type_var_secinfo(datasec_type);	\
+	     i < btf_type_vlen(datasec_type);			\
+	     i++, member++)
+
 static inline bool btf_type_is_ptr(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -194,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
 	return (const struct btf_member *)(t + 1);
 }
 
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
+		const struct btf_type *t)
+{
+	return (const struct btf_var_secinfo *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2aa156af24d6..f3c1b637ab39 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3686,6 +3686,23 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ *     Description
+ *             Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *             pointer to the percpu kernel variable on *cpu*. A ksym is an
+ *             extern variable decorated with '__ksym'. For ksym, there is a
+ *             global var (either static or global) defined of the same name
+ *             in the kernel. The ksym is percpu if the global var is percpu.
+ *             The returned pointer points to the global percpu var on *cpu*.
+ *
+ *             bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ *             kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ *             happens if *cpu* is larger than nr_cpu_ids. The caller of
+ *             bpf_per_cpu_ptr() must check the returned value.
+ *     Return
+ *             A pointer pointing to the kernel percpu variable on *cpu*, or
+ *             NULL, if *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3841,6 +3858,7 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
+	FN(bpf_per_cpu_ptr),            \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:48 -0700
Subject: bpf: Introducte bpf_this_cpu_ptr()

Add bpf_this_cpu_ptr() to help access percpu var on this cpu. This
helper always returns a valid pointer, therefore no need to check
returned value for NULL. Also note that all programs run with
preemption disabled, which means that the returned pointer is stable
during all the execution of the program.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-6-haoluo@google.com
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dde15b2479d..dc63eeed4fd9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -309,6 +309,7 @@ enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
+	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -1832,6 +1833,7 @@ extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
+extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f3c1b637ab39..c446394135be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3703,6 +3703,18 @@ union bpf_attr {
  *     Return
  *             A pointer pointing to the kernel percpu variable on *cpu*, or
  *             NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ *	Description
+ *		Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *		pointer to the percpu kernel variable on this cpu. See the
+ *		description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ *		bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ *		the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ *		never return NULL.
+ *	Return
+ *		A pointer pointing to the kernel percpu variable on this cpu.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3859,6 +3871,7 @@ union bpf_attr {
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
+	FN(bpf_this_cpu_ptr),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 319e4dd11a207bac2eaa9b96060145cd0d4c12d2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 2 Oct 2020 15:02:21 +0300
Subject: net: mscc: ocelot: introduce conversion helpers between port and
 netdev

Since the mscc_ocelot_switch_lib is common between a pure switchdev and
a DSA driver, the procedure of retrieving a net_device for a certain
port index differs, as those are registered by their individual
front-ends.

Up to now that has been dealt with by always passing the port index to
the switch library, but now, we're going to need to work with net_device
pointers from the tc-flower offload, for things like indev, or mirred.
It is not desirable to refactor that, so let's make sure that the flower
offload core has the ability to translate between a net_device and a
port index properly.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 0c40122dcb88..424256fa531b 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -559,6 +559,8 @@ enum ocelot_tag_prefix {
 struct ocelot;
 
 struct ocelot_ops {
+	struct net_device *(*port_to_netdev)(struct ocelot *ocelot, int port);
+	int (*netdev_to_port)(struct net_device *dev);
 	int (*reset)(struct ocelot *ocelot);
 	u16 (*wm_enc)(u16 value);
 };
-- 
cgit v1.2.3


From 1397a2eb52e20e20363cc0a1cb707d5eb473dbb7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 2 Oct 2020 15:02:22 +0300
Subject: net: mscc: ocelot: create TCAM skeleton from tc filter chains

For Ocelot switches, there are 2 ingress pipelines for flow offload
rules: VCAP IS1 (Ingress Classification) and IS2 (Security Enforcement).
IS1 and IS2 support different sets of actions. The pipeline order for a
packet on ingress is:

Basic classification -> VCAP IS1 -> VCAP IS2

Furthermore, IS1 is looked up 3 times, and IS2 is looked up twice (each
TCAM entry can be configured to match only on the first lookup, or only
on the second, or on both etc).

Because the TCAMs are completely independent in hardware, and because of
the fixed pipeline, we actually have very limited options when it comes
to offloading complex rules to them while still maintaining the same
semantics with the software data path.

This patch maps flow offload rules to ingress TCAMs according to a
predefined chain index number. There is going to be a script in
selftests that clarifies the usage model.

There is also an egress TCAM (VCAP ES0, the Egress Rewriter), which is
modeled on top of the default chain 0 of the egress qdisc, because it
doesn't have multiple lookups.

Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Co-developed-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 424256fa531b..46608494616f 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -633,7 +633,8 @@ struct ocelot {
 
 	struct list_head		multicast;
 
-	struct ocelot_vcap_block	block;
+	struct list_head		dummy_rules;
+	struct ocelot_vcap_block	block[3];
 	struct vcap_props		*vcap;
 
 	/* Workqueue to check statistics for overflow with its lock */
-- 
cgit v1.2.3


From 10c24eb23da0dc67934fcc9b5b0f201750ff8cd8 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 1 Oct 2020 18:11:45 +0300
Subject: devlink: add parser error drop packet traps

Add parser error drop packet traps, so that capable device driver could
register them with devlink. The new packet trap group holds any drops of
packets which were marked by the device as erroneous during header
parsing. Add documentation for every added packet trap and packet trap
group.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1c286e9a3590..1f5004a5c9f9 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -784,6 +784,22 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_SAMPLE,
 	DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_TRAP,
 	DEVLINK_TRAP_GENERIC_ID_EARLY_DROP,
+	DEVLINK_TRAP_GENERIC_ID_VXLAN_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_LLC_SNAP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_VLAN_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_PPPOE_PPP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_MPLS_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_ARP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_IP_1_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_IP_N_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_GRE_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_UDP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_TCP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_IPSEC_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_SCTP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_GTP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_ESP_PARSING,
 
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -819,6 +835,7 @@ enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS,
 
 	/* Add new generic trap group IDs above */
 	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -974,6 +991,39 @@ enum devlink_trap_group_generic_id {
 	"flow_action_trap"
 #define DEVLINK_TRAP_GENERIC_NAME_EARLY_DROP \
 	"early_drop"
+#define DEVLINK_TRAP_GENERIC_NAME_VXLAN_PARSING \
+	"vxlan_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_LLC_SNAP_PARSING \
+	"llc_snap_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_VLAN_PARSING \
+	"vlan_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_PPPOE_PPP_PARSING \
+	"pppoe_ppp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_MPLS_PARSING \
+	"mpls_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_PARSING \
+	"arp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IP_1_PARSING \
+	"ip_1_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IP_N_PARSING \
+	"ip_n_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_GRE_PARSING \
+	"gre_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_UDP_PARSING \
+	"udp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_TCP_PARSING \
+	"tcp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_IPSEC_PARSING \
+	"ipsec_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_SCTP_PARSING \
+	"sctp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_DCCP_PARSING \
+	"dccp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_GTP_PARSING \
+	"gtp_parsing"
+#define DEVLINK_TRAP_GENERIC_NAME_ESP_PARSING \
+	"esp_parsing"
+
 
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
 	"l2_drops"
@@ -1025,6 +1075,8 @@ enum devlink_trap_group_generic_id {
 	"acl_sample"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_TRAP \
 	"acl_trap"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \
+	"parser_error_drops"
 
 #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id,	      \
 			     _metadata_cap)				      \
-- 
cgit v1.2.3


From c50bf2be7306cd37e4a8228acfe0fee36b9097dc Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 1 Oct 2020 18:11:46 +0300
Subject: devlink: add .trap_group_action_set() callback

Add a new devlink callback, .trap_group_action_set(), which can be used
by device drivers which do not support controlling the action (drop,
trap) on each trap but rather on the entire group trap.
If this new callback is populated, it will take precedence over the
.trap_action_set() callback when the user requests a change of all the
traps in a group.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1f5004a5c9f9..89ede1ce3a3a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1242,6 +1242,16 @@ struct devlink_ops {
 			      const struct devlink_trap_group *group,
 			      const struct devlink_trap_policer *policer,
 			      struct netlink_ext_ack *extack);
+	/**
+	 * @trap_group_action_set: Trap group action set function.
+	 *
+	 * If this callback is populated, it will take precedence over looping
+	 * over all traps in a group and calling .trap_action_set().
+	 */
+	int (*trap_group_action_set)(struct devlink *devlink,
+				     const struct devlink_trap_group *group,
+				     enum devlink_trap_action action,
+				     struct netlink_ext_ack *extack);
 	/**
 	 * @trap_policer_init: Trap policer initialization function.
 	 *
-- 
cgit v1.2.3


From e5086736969880478abb2ac85ef8757ac6ce45bf Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 2 Oct 2020 14:49:52 -0700
Subject: genetlink: reorg struct genl_family

There are holes and oversized members in struct genl_family.

Before: /* size: 104, cachelines: 2, members: 16 */
After:  /* size:  88, cachelines: 2, members: 16 */

The command field in struct genlmsghdr is a u8, so no point
in the operation count being 32 bit. Also operation 0 is
usually undefined, so we only need 255 entries.

netnsok and parallel_ops are only ever initialized to true.

We can grow the fields as needed, compiler should warn us
if someone tries to assign larger constants.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index b9eb92f3fe86..5cd9ab0c6bd9 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -48,8 +48,11 @@ struct genl_family {
 	char			name[GENL_NAMSIZ];
 	unsigned int		version;
 	unsigned int		maxattr;
-	bool			netnsok;
-	bool			parallel_ops;
+	unsigned int		mcgrp_offset;	/* private */
+	u8			netnsok:1;
+	u8			parallel_ops:1;
+	u8			n_ops;
+	u8			n_mcgrps;
 	const struct nla_policy *policy;
 	int			(*pre_doit)(const struct genl_ops *ops,
 					    struct sk_buff *skb,
@@ -59,9 +62,6 @@ struct genl_family {
 					     struct genl_info *info);
 	const struct genl_ops *	ops;
 	const struct genl_multicast_group *mcgrps;
-	unsigned int		n_ops;
-	unsigned int		n_mcgrps;
-	unsigned int		mcgrp_offset;	/* private */
 	struct module		*module;
 };
 
-- 
cgit v1.2.3


From 0b588afdd16f9e0b63128dc4bcd002e7f2725fe0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 2 Oct 2020 14:49:53 -0700
Subject: genetlink: add small version of ops

We want to add maxattr and policy back to genl_ops, to enable
dumping per command policy to user space. This, however, would
cause bloat for all the families with global policies. Introduce
smaller version of ops (half the size of genl_ops). Translate
these smaller ops into a full blown struct before use in the
core.

v1:
 - use struct assignment
 - put a full copy of the op in struct genl_dumpit_info
 - s/light/small/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 53 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 5cd9ab0c6bd9..8ea1fc1ed1c7 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -41,6 +41,8 @@ struct genl_info;
  *	(private)
  * @ops: the operations supported by this family
  * @n_ops: number of operations supported by this family
+ * @small_ops: the small-struct operations supported by this family
+ * @n_small_ops: number of small-struct operations supported by this family
  */
 struct genl_family {
 	int			id;		/* private */
@@ -52,6 +54,7 @@ struct genl_family {
 	u8			netnsok:1;
 	u8			parallel_ops:1;
 	u8			n_ops;
+	u8			n_small_ops;
 	u8			n_mcgrps;
 	const struct nla_policy *policy;
 	int			(*pre_doit)(const struct genl_ops *ops,
@@ -61,6 +64,7 @@ struct genl_family {
 					     struct sk_buff *skb,
 					     struct genl_info *info);
 	const struct genl_ops *	ops;
+	const struct genl_small_ops *small_ops;
 	const struct genl_multicast_group *mcgrps;
 	struct module		*module;
 };
@@ -108,23 +112,26 @@ enum genl_validate_flags {
 };
 
 /**
- * struct genl_info - info that is available during dumpit op call
- * @family: generic netlink family - for internal genl code usage
- * @ops: generic netlink ops - for internal genl code usage
- * @attrs: netlink attributes
+ * struct genl_small_ops - generic netlink operations (small version)
+ * @cmd: command identifier
+ * @internal_flags: flags used by the family
+ * @flags: flags
+ * @validate: validation flags from enum genl_validate_flags
+ * @doit: standard command callback
+ * @dumpit: callback for dumpers
+ *
+ * This is a cut-down version of struct genl_ops for users who don't need
+ * most of the ancillary infra and want to save space.
  */
-struct genl_dumpit_info {
-	const struct genl_family *family;
-	const struct genl_ops *ops;
-	struct nlattr **attrs;
+struct genl_small_ops {
+	int	(*doit)(struct sk_buff *skb, struct genl_info *info);
+	int	(*dumpit)(struct sk_buff *skb, struct netlink_callback *cb);
+	u8	cmd;
+	u8	internal_flags;
+	u8	flags;
+	u8	validate;
 };
 
-static inline const struct genl_dumpit_info *
-genl_dumpit_info(struct netlink_callback *cb)
-{
-	return cb->data;
-}
-
 /**
  * struct genl_ops - generic netlink operations
  * @cmd: command identifier
@@ -148,6 +155,24 @@ struct genl_ops {
 	u8			validate;
 };
 
+/**
+ * struct genl_info - info that is available during dumpit op call
+ * @family: generic netlink family - for internal genl code usage
+ * @ops: generic netlink ops - for internal genl code usage
+ * @attrs: netlink attributes
+ */
+struct genl_dumpit_info {
+	const struct genl_family *family;
+	struct genl_ops op;
+	struct nlattr **attrs;
+};
+
+static inline const struct genl_dumpit_info *
+genl_dumpit_info(struct netlink_callback *cb)
+{
+	return cb->data;
+}
+
 int genl_register_family(struct genl_family *family);
 int genl_unregister_family(const struct genl_family *family);
 void genl_notify(const struct genl_family *family, struct sk_buff *skb,
-- 
cgit v1.2.3


From adc848450ff84e961cf7966b8a475889a92a9fd3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 2 Oct 2020 14:49:55 -0700
Subject: genetlink: add a structure for dump state

Whenever netlink dump uses more than 2 cb->args[] entries
code gets hard to read. We're about to add more state to
ctrl_dumppolicy() so create a structure.

Since the structure is typed and clearly named we can remove
the local fam_id variable and use ctx->fam_id directly.

v3:
 - rebase onto explicit free fix
v1:
 - s/nl_policy_dump/netlink_policy_dump_state/
 - forward declare struct netlink_policy_dump_state,
   and move from passing unsigned long to actual pointer type
 - add build bug on
 - u16 fam_id
 - s/args/ctx/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 9e7eca961a98..00258590f2cb 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1935,11 +1935,14 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 void nla_get_range_signed(const struct nla_policy *pt,
 			  struct netlink_range_validation_signed *range);
 
+struct netlink_policy_dump_state;
+
 int netlink_policy_dump_start(const struct nla_policy *policy,
 			      unsigned int maxtype,
-			      unsigned long *state);
-bool netlink_policy_dump_loop(unsigned long state);
-int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state);
-void netlink_policy_dump_free(unsigned long state);
+			      struct netlink_policy_dump_state **state);
+bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
+int netlink_policy_dump_write(struct sk_buff *skb,
+			      struct netlink_policy_dump_state *state);
+void netlink_policy_dump_free(struct netlink_policy_dump_state *state);
 
 #endif
-- 
cgit v1.2.3


From 48526a0f4ca2b484cab4318dc0b2c2be1d8685b7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 2 Oct 2020 14:49:57 -0700
Subject: genetlink: bring back per op policy

Add policy to the struct genl_ops structure, this time
with maxattr, so it can be used properly.

Propagate .policy and .maxattr from the family
in genl_get_cmd() if needed, this way the rest of the
code does not have to worry if the policy is per op
or global.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 8ea1fc1ed1c7..cb35625d001e 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -137,6 +137,8 @@ struct genl_small_ops {
  * @cmd: command identifier
  * @internal_flags: flags used by the family
  * @flags: flags
+ * @maxattr: maximum number of attributes supported
+ * @policy: netlink policy (takes precedence over family policy)
  * @doit: standard command callback
  * @start: start callback for dumps
  * @dumpit: callback for dumpers
@@ -149,6 +151,8 @@ struct genl_ops {
 	int		       (*dumpit)(struct sk_buff *skb,
 					 struct netlink_callback *cb);
 	int		       (*done)(struct netlink_callback *cb);
+	const struct nla_policy *policy;
+	unsigned int		maxattr;
 	u8			cmd;
 	u8			internal_flags;
 	u8			flags;
-- 
cgit v1.2.3


From 04a351a62bd4be1dbcc88fae69b990362d88ffe5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 3 Oct 2020 10:44:43 +0200
Subject: netlink: rework policy dump to support multiple policies

Rework the policy dump code a bit to support adding multiple
policies to a single dump, in order to e.g. support per-op
policies in generic netlink.

v2:
 - move kernel-doc to implementation [Jakub]
 - squash the first patch to not flip-flop on the prototype
   [Jakub]
 - merge netlink_policy_dump_get_policy_idx() with the old
   get_policy_idx() we already had
 - rebase without Jakub's patch to have per-op dump

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 00258590f2cb..5a5ff97cc596 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1937,9 +1937,12 @@ void nla_get_range_signed(const struct nla_policy *pt,
 
 struct netlink_policy_dump_state;
 
-int netlink_policy_dump_start(const struct nla_policy *policy,
-			      unsigned int maxtype,
-			      struct netlink_policy_dump_state **state);
+int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
+				   const struct nla_policy *policy,
+				   unsigned int maxtype);
+int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
+				       const struct nla_policy *policy,
+				       unsigned int maxtype);
 bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
 int netlink_policy_dump_write(struct sk_buff *skb,
 			      struct netlink_policy_dump_state *state);
-- 
cgit v1.2.3


From 50a896cf2d6f34e884a00139d6e6012c9833ace3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 3 Oct 2020 10:44:45 +0200
Subject: genetlink: properly support per-op policy dumping

Add support for per-op policy dumping. The data is pretty much
as before, except that now the assumption that the policy with
index 0 is "the" policy no longer holds - you now need to look
at the new CTRL_ATTR_OP_POLICY attribute which is a nested attr
(indexed by op) containing attributes for do and dump policies.

When a single op is requested, the CTRL_ATTR_OP_POLICY will be
added in the same way, since do and dump policies may differ.

v2:
 - conditionally advertise per-command policies only if there
   actually is a policy being used for the do/dump and it's
   present at all

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/genetlink.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 9c0636ec2286..bc9c98e84828 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -64,6 +64,7 @@ enum {
 	CTRL_ATTR_OPS,
 	CTRL_ATTR_MCAST_GROUPS,
 	CTRL_ATTR_POLICY,
+	CTRL_ATTR_OP_POLICY,
 	__CTRL_ATTR_MAX,
 };
 
@@ -85,6 +86,15 @@ enum {
 	__CTRL_ATTR_MCAST_GRP_MAX,
 };
 
+enum {
+	CTRL_ATTR_POLICY_UNSPEC,
+	CTRL_ATTR_POLICY_DO,
+	CTRL_ATTR_POLICY_DUMP,
+
+	__CTRL_ATTR_POLICY_DUMP_MAX,
+	CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
 
 
-- 
cgit v1.2.3


From e992a6eda9a1eeeab73a8d2792464e4a2b1ebc3b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 3 Oct 2020 10:44:46 +0200
Subject: genetlink: allow dumping command-specific policy

Right now CTRL_CMD_GETPOLICY can only dump the family-wide
policy. Support dumping policy of a specific op.

v3:
 - rebase after per-op policy export and handle that
v2:
 - make cmd U32, just in case.
v1:
 - don't echo op in the output in a naive way, this should
   make it cleaner to extend the output format for dumping
   policies for all the commands at once in the future.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20201001225933.1373426-11-kuba@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/genetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index bc9c98e84828..d83f214b4134 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -65,6 +65,7 @@ enum {
 	CTRL_ATTR_MCAST_GROUPS,
 	CTRL_ATTR_POLICY,
 	CTRL_ATTR_OP_POLICY,
+	CTRL_ATTR_OP,
 	__CTRL_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From 8e1b3884eed7d96d4f1210b668611ee6a1803ea5 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 1 Oct 2020 17:12:50 +0000
Subject: net: remove NETDEV_HW_ADDR_T_SLAVE

NETDEV_HW_ADDR_T_SLAVE is not used anymore, remove it.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28cfa53daf72..0c79d9e56a5e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -212,9 +212,8 @@ struct netdev_hw_addr {
 	unsigned char		type;
 #define NETDEV_HW_ADDR_T_LAN		1
 #define NETDEV_HW_ADDR_T_SAN		2
-#define NETDEV_HW_ADDR_T_SLAVE		3
-#define NETDEV_HW_ADDR_T_UNICAST	4
-#define NETDEV_HW_ADDR_T_MULTICAST	5
+#define NETDEV_HW_ADDR_T_UNICAST	3
+#define NETDEV_HW_ADDR_T_MULTICAST	4
 	bool			global_use;
 	int			sync_cnt;
 	int			refcount;
-- 
cgit v1.2.3


From 19fbcb36a39eefbe8912a13ccc02e937b1c418d6 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 3 Oct 2020 00:44:28 +0200
Subject: net/sched: act_vlan: Add {POP,PUSH}_ETH actions

Implement TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH, to
respectively pop and push a base Ethernet header at the beginning of a
frame.

POP_ETH is just a matter of pulling ETH_HLEN bytes. VLAN tags, if any,
must be stripped before calling POP_ETH.

PUSH_ETH is restricted to skbs with no mac_header, and only the MAC
addresses can be configured. The Ethertype is automatically set from
skb->protocol. These restrictions ensure that all skb's fields remain
consistent, so that this action can't confuse other part of the
networking stack (like GSO).

Since openvswitch already had these actions, consolidate the code in
skbuff.c (like for vlan and mpls push/pop).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h              | 3 +++
 include/net/tc_act/tc_vlan.h        | 2 ++
 include/uapi/linux/tc_act/tc_vlan.h | 4 ++++
 3 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3d0cf3722bb4..42131e325e27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3573,6 +3573,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len);
 int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+int skb_eth_pop(struct sk_buff *skb);
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
+		 const unsigned char *src);
 int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
 		  int mac_len, bool ethernet);
 int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 4e2502408c31..f051046ba034 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -11,6 +11,8 @@
 
 struct tcf_vlan_params {
 	int               tcfv_action;
+	unsigned char     tcfv_push_dst[ETH_ALEN];
+	unsigned char     tcfv_push_src[ETH_ALEN];
 	u16               tcfv_push_vid;
 	__be16            tcfv_push_proto;
 	u8                tcfv_push_prio;
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 168995b54a70..5b306fe815cc 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,8 @@
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
 #define TCA_VLAN_ACT_MODIFY	3
+#define TCA_VLAN_ACT_POP_ETH	4
+#define TCA_VLAN_ACT_PUSH_ETH	5
 
 struct tc_vlan {
 	tc_gen;
@@ -30,6 +32,8 @@ enum {
 	TCA_VLAN_PUSH_VLAN_PROTOCOL,
 	TCA_VLAN_PAD,
 	TCA_VLAN_PUSH_VLAN_PRIORITY,
+	TCA_VLAN_PUSH_ETH_DST,
+	TCA_VLAN_PUSH_ETH_SRC,
 	__TCA_VLAN_MAX,
 };
 #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
-- 
cgit v1.2.3


From a45294af9e96a3e060b6272fa7cd2c4b196de335 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 3 Oct 2020 00:44:31 +0200
Subject: net/sched: act_mpls: Add action to push MPLS LSE before Ethernet
 header

Define the MAC_PUSH action which pushes an MPLS LSE before the mac
header (instead of between the mac and the network headers as the
plain PUSH action does).

The only special case is when the skb has an offloaded VLAN. In that
case, it has to be inlined before pushing the MPLS header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_mpls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h
index 9360e95273c7..9e4e8f52a779 100644
--- a/include/uapi/linux/tc_act/tc_mpls.h
+++ b/include/uapi/linux/tc_act/tc_mpls.h
@@ -10,6 +10,7 @@
 #define TCA_MPLS_ACT_PUSH	2
 #define TCA_MPLS_ACT_MODIFY	3
 #define TCA_MPLS_ACT_DEC_TTL	4
+#define TCA_MPLS_ACT_MAC_PUSH	5
 
 struct tc_mpls {
 	tc_gen;		/* generic TC action fields. */
-- 
cgit v1.2.3


From 5f48846daf3321f8a1f8aa99cd6173e3980b7a29 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 2 Oct 2020 15:50:56 +0200
Subject: netfilter: nf_tables: Enable fast nft_cmp for inverted matches

Add a boolean indicating NFT_CMP_NEQ. To include it into the match
decision, it is sufficient to XOR it with the data comparison's result.

While being at it, store the mask that is calculated during expression
init and free the eval routine from having to recalculate it each time.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 78516de14d31..df2d91c814cb 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -25,8 +25,10 @@ void nf_tables_core_module_exit(void);
 
 struct nft_cmp_fast_expr {
 	u32			data;
+	u32			mask;
 	enum nft_registers	sreg:8;
 	u8			len;
+	bool			inv;
 };
 
 struct nft_immediate_expr {
-- 
cgit v1.2.3


From 10fdd6d80e4c21ad48f3860d723f5b3b5965477b Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 1 Oct 2020 18:57:44 +0200
Subject: netfilter: nf_tables: Implement fast bitwise expression

A typical use of bitwise expression is to mask out parts of an IP
address when matching on the network part only. Optimize for this common
use with a fast variant for NFT_BITWISE_BOOL-type expressions operating
on 32bit-sized values.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index df2d91c814cb..8657e6815b07 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -23,6 +23,13 @@ extern struct nft_object_type nft_secmark_obj_type;
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
 
+struct nft_bitwise_fast_expr {
+	u32			mask;
+	u32			xor;
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+};
+
 struct nft_cmp_fast_expr {
 	u32			data;
 	u32			mask;
@@ -68,6 +75,8 @@ struct nft_payload_set {
 
 extern const struct nft_expr_ops nft_payload_fast_ops;
 
+extern const struct nft_expr_ops nft_bitwise_fast_ops;
+
 extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
-- 
cgit v1.2.3


From cf1166349c68816f4259d32559f54972b0d5c1a4 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:51 +0200
Subject: net: devlink: Add unused port flavour

Not all ports of a switch need to be used, particularly in embedded
systems. Add a port flavour for ports which physically exist in the
switch, but are not connected to the front panel etc, and so are
unused. By having unused ports present in devlink, it gives a more
accurate representation of the hardware. It also allows regions to be
associated to such ports, so allowing, for example, to determine
unused ports are correctly powered off, or to compare probable reset
defaults of unused ports to used ports experiences issues.

Actually registering unused ports and setting the flavour to unused is
optional. The DSA core will register all such switch ports, but such
ports are expected to be limited in number. Bigger ASICs may decide
not to list unused ports.

v2:
Expand the description about why it is useful

Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba467dc07852..5f1d6c327670 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -197,6 +197,9 @@ enum devlink_port_flavour {
 				      * port that faces the PCI VF.
 				      */
 	DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */
+	DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but
+				      * is not used in any way.
+				      */
 };
 
 enum devlink_param_cmode {
-- 
cgit v1.2.3


From 3122433eb533aac7d08302ee4b3bd3adfcd280d3 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:53 +0200
Subject: net: dsa: Register devlink ports before calling DSA driver setup()

DSA drivers want to create regions on devlink ports as well as the
devlink device instance, in order to export registers and other tables
per port. To keep all this code together in the drivers, have the
devlink ports registered early, so the setup() method can setup both
device and port devlink regions.

v3:
Remove dp->setup
Move common code out of switch statement.
Fix wrong goto

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8b0696e08cac..049140b2f593 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -215,6 +215,7 @@ struct dsa_port {
 	u8			stp_state;
 	struct net_device	*bridge_dev;
 	struct devlink_port	devlink_port;
+	bool			devlink_port_setup;
 	struct phylink		*pl;
 	struct phylink_config	pl_config;
 
-- 
cgit v1.2.3


From 544e7c33ec2f8077685c254f5e3b03a85c0e62eb Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:54 +0200
Subject: net: devlink: Add support for port regions

Allow regions to be registered to a devlink port. The same netlink API
is used, but the port index is provided to indicate when a region is a
port region as opposed to a device region.

Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 89ede1ce3a3a..237ba5e29a3b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -110,6 +110,7 @@ struct devlink_port_attrs {
 struct devlink_port {
 	struct list_head list;
 	struct list_head param_list;
+	struct list_head region_list;
 	struct devlink *devlink;
 	unsigned int index;
 	bool registered;
@@ -591,6 +592,26 @@ struct devlink_region_ops {
 	void *priv;
 };
 
+/**
+ * struct devlink_port_region_ops - Region operations for a port
+ * @name: region name
+ * @destructor: callback used to free snapshot memory when deleting
+ * @snapshot: callback to request an immediate snapshot. On success,
+ *            the data variable must be updated to point to the snapshot data.
+ *            The function will be called while the devlink instance lock is
+ *            held.
+ * @priv: Pointer to driver private data for the region operation
+ */
+struct devlink_port_region_ops {
+	const char *name;
+	void (*destructor)(const void *data);
+	int (*snapshot)(struct devlink_port *port,
+			const struct devlink_port_region_ops *ops,
+			struct netlink_ext_ack *extack,
+			u8 **data);
+	void *priv;
+};
+
 struct devlink_fmsg;
 struct devlink_health_reporter;
 
@@ -1445,7 +1466,13 @@ struct devlink_region *
 devlink_region_create(struct devlink *devlink,
 		      const struct devlink_region_ops *ops,
 		      u32 region_max_snapshots, u64 region_size);
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+			   const struct devlink_port_region_ops *ops,
+			   u32 region_max_snapshots, u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
+void devlink_port_region_destroy(struct devlink_region *region);
+
 int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id);
 void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id);
 int devlink_region_snapshot_create(struct devlink_region *region,
-- 
cgit v1.2.3


From 08156ba430b412bd9c23fe6155a58c7cb166045c Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:55 +0200
Subject: net: dsa: Add devlink port regions support to DSA

Allow DSA drivers to make use of devlink port regions, via simple
wrappers.

Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 049140b2f593..ca426cf9927b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -681,6 +681,11 @@ struct devlink_region *
 dsa_devlink_region_create(struct dsa_switch *ds,
 			  const struct devlink_region_ops *ops,
 			  u32 region_max_snapshots, u64 region_size);
+struct devlink_region *
+dsa_devlink_port_region_create(struct dsa_switch *ds,
+			       int port,
+			       const struct devlink_port_region_ops *ops,
+			       u32 region_max_snapshots, u64 region_size);
 void dsa_devlink_region_destroy(struct devlink_region *region);
 
 struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
-- 
cgit v1.2.3


From 7d1e2a10681d3b6eeaace68885ef5de88ce03efe Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:56 +0200
Subject: net: dsa: Add helper for converting devlink port to ds and port

Hide away from DSA drivers how devlink works.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index ca426cf9927b..c0185660881c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -701,6 +701,20 @@ static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl)
 	return dl_priv->ds;
 }
 
+static inline
+struct dsa_switch *dsa_devlink_port_to_ds(struct devlink_port *port)
+{
+	struct devlink *dl = port->devlink;
+	struct dsa_devlink_priv *dl_priv = devlink_priv(dl);
+
+	return dl_priv->ds;
+}
+
+static inline int dsa_devlink_port_to_port(struct devlink_port *port)
+{
+	return port->index;
+}
+
 struct dsa_switch_driver {
 	struct list_head	list;
 	const struct dsa_switch_ops *ops;
-- 
cgit v1.2.3


From 2e554a7a5d8a8092ecb20c547734bb33fddd5046 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 3 Oct 2020 01:06:46 +0300
Subject: net: dsa: propagate switchdev vlan_filtering prepare phase to drivers

A driver may refuse to enable VLAN filtering for any reason beyond what
the DSA framework cares about, such as:
- having tc-flower rules that rely on the switch being VLAN-aware
- the particular switch does not support VLAN, even if the driver does
  (the DSA framework just checks for the presence of the .port_vlan_add
  and .port_vlan_del pointers)
- simply not supporting this configuration to be toggled at runtime

Currently, when a driver rejects a configuration it cannot support, it
does this from the commit phase, which triggers various warnings in
switchdev.

So propagate the prepare phase to drivers, to give them the ability to
refuse invalid configurations cleanly and avoid the warnings.

Since we need to modify all function prototypes and check for the
prepare phase from within the drivers, take that opportunity and move
the existing driver restrictions within the prepare phase where that is
possible and easy.

Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Woojung Huh <woojung.huh@microchip.com>
Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
Cc: Sean Wang <sean.wang@mediatek.com>
Cc: Landen Chao <Landen.Chao@mediatek.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Jonathan McDowell <noodles@earth.li>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h         | 3 ++-
 include/soc/mscc/ocelot.h | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index c0185660881c..35429a140dfa 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -552,7 +552,8 @@ struct dsa_switch_ops {
 	 * VLAN support
 	 */
 	int	(*port_vlan_filtering)(struct dsa_switch *ds, int port,
-				       bool vlan_filtering);
+				       bool vlan_filtering,
+				       struct switchdev_trans *trans);
 	int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
 				 const struct switchdev_obj_port_vlan *vlan);
 	void (*port_vlan_add)(struct dsa_switch *ds, int port,
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 46608494616f..1e9db9577441 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -730,8 +730,8 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
 void ocelot_adjust_link(struct ocelot *ocelot, int port,
 			struct phy_device *phydev);
-void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
-				bool vlan_aware);
+int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
+			       struct switchdev_trans *trans);
 void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
 int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
 			    struct net_device *bridge);
-- 
cgit v1.2.3


From c6db31ffe202c3120147e9f3455a4dbc90546d39 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Mon, 5 Oct 2020 18:39:37 +0300
Subject: ethtool: allow netdev driver to define phy tunables

Define get/set phy tunable callbacks in ethtool ops.
This will allow MAC drivers with integrated PHY still to implement
these tunables.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 060b20f0b20f..6408b446051f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -505,6 +505,10 @@ struct ethtool_ops {
 				      struct ethtool_fecparam *);
 	void	(*get_ethtool_phy_stats)(struct net_device *,
 					 struct ethtool_stats *, u64 *);
+	int	(*get_phy_tunable)(struct net_device *,
+				   const struct ethtool_tunable *, void *);
+	int	(*set_phy_tunable)(struct net_device *,
+				   const struct ethtool_tunable *, const void *);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
-- 
cgit v1.2.3


From 451b05f413d3fd89ee84ae99a5029f619ed3cb78 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 5 Oct 2020 22:34:18 +0200
Subject: net: netdevice.h: sw_netstats_rx_add helper

some drivers/network protocols update rx bytes/packets under
u64_stats_update_begin/end sequence.
Add a specific helper like dev_lstats_add()

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d126e36580c9..a0df43b13839 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2543,6 +2543,16 @@ struct pcpu_lstats {
 
 void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);
 
+static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
+{
+	struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->rx_bytes += len;
+	tstats->rx_packets++;
+	u64_stats_update_end(&tstats->syncp);
+}
+
 static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
 {
 	struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats);
-- 
cgit v1.2.3


From ddcf3b70c5ae8444e920d28e30e7ad4e866c8015 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 5 Oct 2020 15:07:37 -0700
Subject: netlink: create helpers for checking type is an int

There's a number of policies which check if type is a uint or sint.
Factor the checking against the list of value sizes to a helper
for easier reuse.

v2: - new patch

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 5a5ff97cc596..c5aa46f379bc 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -362,20 +362,21 @@ struct nla_policy {
 #define NLA_POLICY_BITFIELD32(valid) \
 	{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
 
+#define __NLA_IS_UINT_TYPE(tp)						\
+	(tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64)
+#define __NLA_IS_SINT_TYPE(tp)						\
+	(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
+
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
 #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
-	(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 ||	\
-		      tp == NLA_U32 || tp == NLA_U64 ||	\
+	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||	\
 		      tp == NLA_MSECS ||		\
 		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_SINT_TYPE(tp)			\
-	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16  ||	\
-		      tp == NLA_S32 || tp == NLA_S64) + tp)
+	(__NLA_ENSURE(__NLA_IS_SINT_TYPE(tp)) + tp)
 #define NLA_ENSURE_INT_OR_BINARY_TYPE(tp)		\
-	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
-		      tp == NLA_S16 || tp == NLA_U16 ||	\
-		      tp == NLA_S32 || tp == NLA_U32 ||	\
-		      tp == NLA_S64 || tp == NLA_U64 ||	\
+	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||		\
+		      __NLA_IS_SINT_TYPE(tp) ||		\
 		      tp == NLA_MSECS ||		\
 		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_NO_VALIDATION_PTR(tp)		\
-- 
cgit v1.2.3


From bdbb4e29df8b790db50cb73ce25d23543329f05f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 5 Oct 2020 15:07:38 -0700
Subject: netlink: add mask validation

We don't have good validation policy for existing unsigned int attrs
which serve as flags (for new ones we could use NLA_BITFIELD32).
With increased use of policy dumping having the validation be
expressed as part of the policy is important. Add validation
policy in form of a mask of supported/valid bits.

Support u64 in the uAPI to be future-proof, but really for now
the embedded mask member can only hold 32 bits, so anything with
bit 32+ set will always fail validation.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h        | 10 ++++++++++
 include/uapi/linux/netlink.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index c5aa46f379bc..2b9e41075f19 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -200,6 +200,7 @@ enum nla_policy_validation {
 	NLA_VALIDATE_RANGE_WARN_TOO_LONG,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
+	NLA_VALIDATE_MASK,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
 };
@@ -317,6 +318,7 @@ struct nla_policy {
 	u16		len;
 	union {
 		const u32 bitfield32_valid;
+		const u32 mask;
 		const char *reject_message;
 		const struct nla_policy *nested_policy;
 		struct netlink_range_validation *range;
@@ -368,6 +370,8 @@ struct nla_policy {
 	(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
+#define NLA_ENSURE_UINT_TYPE(tp)			\
+	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp)
 #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||	\
 		      tp == NLA_MSECS ||		\
@@ -416,6 +420,12 @@ struct nla_policy {
 	.max = _max,					\
 }
 
+#define NLA_POLICY_MASK(tp, _mask) {			\
+	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_MASK,		\
+	.mask = _mask,					\
+}
+
 #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) {		\
 	.type = NLA_ENSURE_NO_VALIDATION_PTR(tp),	\
 	.validation_type = NLA_VALIDATE_FUNCTION,	\
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index eac8a6a648ea..d02e472ba54c 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -331,6 +331,7 @@ enum netlink_attribute_type {
  *	the index, if limited inside the nesting (U32)
  * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
  *	bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
  * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
  */
 enum netlink_policy_type_attr {
@@ -346,6 +347,7 @@ enum netlink_policy_type_attr {
 	NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
 	NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
 	NL_POLICY_TYPE_ATTR_PAD,
+	NL_POLICY_TYPE_ATTR_MASK,
 
 	/* keep last */
 	__NL_POLICY_TYPE_ATTR_MAX,
-- 
cgit v1.2.3


From eb88531bdbfaafb827192d1fc6c5a3fcc4fadd96 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 27 Sep 2020 01:24:31 +0900
Subject: can: raw: add missing error queue support

Error queue are not yet implemented in CAN-raw sockets.

The problem: a userland call to recvmsg(soc, msg, MSG_ERRQUEUE) on a
CAN-raw socket would unqueue messages from the normal queue without
any kind of error or warning. As such, it prevented CAN drivers from
using the functionalities that relies on the error queue such as
skb_tx_timestamp().

SCM_CAN_RAW_ERRQUEUE is defined as the type for the CAN raw error
queue. SCM stands for "Socket control messages". The name is inspired
from SCM_J1939_ERRQUEUE of include/uapi/linux/can/j1939.h.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20200926162527.270030-1-mailhol.vincent@wanadoo.fr
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/raw.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 6a11d308eb5c..3386aa81fdf2 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -49,6 +49,9 @@
 #include <linux/can.h>
 
 #define SOL_CAN_RAW (SOL_CAN_BASE + CAN_RAW)
+enum {
+	SCM_CAN_RAW_ERRQUEUE = 1,
+};
 
 /* for socket options affecting the socket (not the global system) */
 
-- 
cgit v1.2.3


From f55a52bb2cdbc5a92ca209d0ada90a490a188f58 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 3 Oct 2020 00:41:46 +0900
Subject: can: dev: fix type of get_can_dlc() and get_canfd_dlc() macros

The macros get_can_dlc() and get_canfd_dlc() are not visible in
userland. As such, type u8 should be preferred over type __u8.

Reference: https://lkml.org/lkml/2020/10/1/708
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20201002154219.4887-3-mailhol.vincent@wanadoo.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index ed0482b2f4b2..f8975d0b90bb 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -84,13 +84,13 @@ struct can_priv {
 
 /*
  * get_can_dlc(value) - helper macro to cast a given data length code (dlc)
- * to __u8 and ensure the dlc value to be max. 8 bytes.
+ * to u8 and ensure the dlc value to be max. 8 bytes.
  *
  * To be used in the CAN netdriver receive path to ensure conformance with
  * ISO 11898-1 Chapter 8.4.2.3 (DLC field)
  */
-#define get_can_dlc(i)		(min_t(__u8, (i), CAN_MAX_DLC))
-#define get_canfd_dlc(i)	(min_t(__u8, (i), CANFD_MAX_DLC))
+#define get_can_dlc(i)		(min_t(u8, (i), CAN_MAX_DLC))
+#define get_canfd_dlc(i)	(min_t(u8, (i), CANFD_MAX_DLC))
 
 /* Check for outgoing skbs that have not been created by the CAN subsystem */
 static inline bool can_skb_headroom_valid(struct net_device *dev,
-- 
cgit v1.2.3


From 49f3d12b0f70ea867b891ad2a97f6e51bb564e18 Mon Sep 17 00:00:00 2001
From: Jakub Wilk <jwilk@jwilk.net>
Date: Wed, 7 Oct 2020 07:57:17 +0200
Subject: bpf: Fix typo in uapi/linux/bpf.h

Reported-by: Samanta Navarro <ferivoz@riseup.net>
Signed-off-by: Jakub Wilk <jwilk@jwilk.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201007055717.7319-1-jwilk@jwilk.net
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c446394135be..d83561e8cd2c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2253,7 +2253,7 @@ union bpf_attr {
  *	Description
  *		This helper is used in programs implementing policies at the
  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		if the verdict eBPF program returns **SK_PASS**), redirect it
  *		to the socket referenced by *map* (of type
  *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
  *		egress interfaces can be used for redirection. The
-- 
cgit v1.2.3


From 1c47fa6b31c2683f03bc2f9174902bb7dcd35d83 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 3 Oct 2020 00:41:49 +0900
Subject: can: dev: add a helper function to calculate the duration of one bit

Rename macro CAN_CALC_SYNC_SEG to CAN_SYNC_SEG and make it available
through include/linux/can/dev.h

Add an helper function can_bit_time() which returns the duration (in
time quanta) of one CAN bit.

Rationale for this patch: the sync segment and the bit time are two
concepts which are defined in the CAN ISO standard. Device drivers for
CAN might need those.

Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for
additional information.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20201002154219.4887-6-mailhol.vincent@wanadoo.fr
[mkl: Let can_bit_time() return an unsinged int, make argument const]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index f8975d0b90bb..41ff31795320 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -82,6 +82,21 @@ struct can_priv {
 #endif
 };
 
+#define CAN_SYNC_SEG 1
+
+/*
+ * can_bit_time() - Duration of one bit
+ *
+ * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for
+ * additional information.
+ *
+ * Return: the number of time quanta in one bit.
+ */
+static inline unsigned int can_bit_time(const struct can_bittiming *bt)
+{
+	return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2;
+}
+
 /*
  * get_can_dlc(value) - helper macro to cast a given data length code (dlc)
  * to u8 and ensure the dlc value to be max. 8 bytes.
-- 
cgit v1.2.3


From e057dd3fc20ffb3d7f150af46542a51b59b90127 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 28 Sep 2020 22:04:04 +0200
Subject: can: add ISO 15765-2:2016 transport protocol

CAN Transport Protocols offer support for segmented Point-to-Point
communication between CAN nodes via two defined CAN Identifiers.
As CAN frames can only transport a small amount of data bytes
(max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this
segmentation is needed to transport longer PDUs as needed e.g. for
vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic.
This protocol driver implements data transfers according to
ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20200928200404.82229-1-socketcan@hartkopp.net
[mkl: Removed "WITH Linux-syscall-note" from isotp.c.
      Fixed indention, a checkpatch warning and typos.
      Replaced __u{8,32} by u{8,32}.
      Removed always false (optlen < 0) check in isotp_setsockopt().]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h | 166 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 include/uapi/linux/can/isotp.h

(limited to 'include')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
new file mode 100644
index 000000000000..553006509f4e
--- /dev/null
+++ b/include/uapi/linux/can/isotp.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * linux/can/isotp.h
+ *
+ * Definitions for isotp CAN sockets (ISO 15765-2:2016)
+ *
+ * Copyright (c) 2020 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#ifndef _UAPI_CAN_ISOTP_H
+#define _UAPI_CAN_ISOTP_H
+
+#include <linux/types.h>
+#include <linux/can.h>
+
+#define SOL_CAN_ISOTP (SOL_CAN_BASE + CAN_ISOTP)
+
+/* for socket options affecting the socket (not the global system) */
+
+#define CAN_ISOTP_OPTS		1	/* pass struct can_isotp_options */
+
+#define CAN_ISOTP_RECV_FC	2	/* pass struct can_isotp_fc_options */
+
+/* sockopts to force stmin timer values for protocol regression tests */
+
+#define CAN_ISOTP_TX_STMIN	3	/* pass __u32 value in nano secs    */
+					/* use this time instead of value   */
+					/* provided in FC from the receiver */
+
+#define CAN_ISOTP_RX_STMIN	4	/* pass __u32 value in nano secs   */
+					/* ignore received CF frames which */
+					/* timestamps differ less than val */
+
+#define CAN_ISOTP_LL_OPTS	5	/* pass struct can_isotp_ll_options */
+
+struct can_isotp_options {
+
+	__u32 flags;		/* set flags for isotp behaviour.	*/
+				/* __u32 value : flags see below	*/
+
+	__u32 frame_txtime;	/* frame transmission time (N_As/N_Ar)	*/
+				/* __u32 value : time in nano secs	*/
+
+	__u8  ext_address;	/* set address for extended addressing	*/
+				/* __u8 value : extended address	*/
+
+	__u8  txpad_content;	/* set content of padding byte (tx)	*/
+				/* __u8 value : content	on tx path	*/
+
+	__u8  rxpad_content;	/* set content of padding byte (rx)	*/
+				/* __u8 value : content	on rx path	*/
+
+	__u8  rx_ext_address;	/* set address for extended addressing	*/
+				/* __u8 value : extended address (rx)	*/
+};
+
+struct can_isotp_fc_options {
+
+	__u8  bs;		/* blocksize provided in FC frame	*/
+				/* __u8 value : blocksize. 0 = off	*/
+
+	__u8  stmin;		/* separation time provided in FC frame	*/
+				/* __u8 value :				*/
+				/* 0x00 - 0x7F : 0 - 127 ms		*/
+				/* 0x80 - 0xF0 : reserved		*/
+				/* 0xF1 - 0xF9 : 100 us - 900 us	*/
+				/* 0xFA - 0xFF : reserved		*/
+
+	__u8  wftmax;		/* max. number of wait frame transmiss.	*/
+				/* __u8 value : 0 = omit FC N_PDU WT	*/
+};
+
+struct can_isotp_ll_options {
+
+	__u8  mtu;		/* generated & accepted CAN frame type	*/
+				/* __u8 value :				*/
+				/* CAN_MTU   (16) -> standard CAN 2.0	*/
+				/* CANFD_MTU (72) -> CAN FD frame	*/
+
+	__u8  tx_dl;		/* tx link layer data length in bytes	*/
+				/* (configured maximum payload length)	*/
+				/* __u8 value : 8,12,16,20,24,32,48,64	*/
+				/* => rx path supports all LL_DL values */
+
+	__u8  tx_flags;		/* set into struct canfd_frame.flags	*/
+				/* at frame creation: e.g. CANFD_BRS	*/
+				/* Obsolete when the BRS flag is fixed	*/
+				/* by the CAN netdriver configuration	*/
+};
+
+/* flags for isotp behaviour */
+
+#define CAN_ISOTP_LISTEN_MODE	0x001	/* listen only (do not send FC) */
+#define CAN_ISOTP_EXTEND_ADDR	0x002	/* enable extended addressing */
+#define CAN_ISOTP_TX_PADDING	0x004	/* enable CAN frame padding tx path */
+#define CAN_ISOTP_RX_PADDING	0x008	/* enable CAN frame padding rx path */
+#define CAN_ISOTP_CHK_PAD_LEN	0x010	/* check received CAN frame padding */
+#define CAN_ISOTP_CHK_PAD_DATA	0x020	/* check received CAN frame padding */
+#define CAN_ISOTP_HALF_DUPLEX	0x040	/* half duplex error state handling */
+#define CAN_ISOTP_FORCE_TXSTMIN	0x080	/* ignore stmin from received FC */
+#define CAN_ISOTP_FORCE_RXSTMIN	0x100	/* ignore CFs depending on rx stmin */
+#define CAN_ISOTP_RX_EXT_ADDR	0x200	/* different rx extended addressing */
+#define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
+
+
+/* default values */
+
+#define CAN_ISOTP_DEFAULT_FLAGS		0
+#define CAN_ISOTP_DEFAULT_EXT_ADDRESS	0x00
+#define CAN_ISOTP_DEFAULT_PAD_CONTENT	0xCC /* prevent bit-stuffing */
+#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	0
+#define CAN_ISOTP_DEFAULT_RECV_BS	0
+#define CAN_ISOTP_DEFAULT_RECV_STMIN	0x00
+#define CAN_ISOTP_DEFAULT_RECV_WFTMAX	0
+
+#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
+#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
+#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
+
+/*
+ * Remark on CAN_ISOTP_DEFAULT_RECV_* values:
+ *
+ * We can strongly assume, that the Linux Kernel implementation of
+ * CAN_ISOTP is capable to run with BS=0, STmin=0 and WFTmax=0.
+ * But as we like to be able to behave as a commonly available ECU,
+ * these default settings can be changed via sockopts.
+ * For that reason the STmin value is intentionally _not_ checked for
+ * consistency and copied directly into the flow control (FC) frame.
+ *
+ */
+
+#endif /* !_UAPI_CAN_ISOTP_H */
-- 
cgit v1.2.3


From ba6ff70a3bb76c1ff440d3a0044b82e97abb648f Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Sat, 3 Oct 2020 15:04:18 -0700
Subject: mac80211: copy configured beacon tx rate to driver

The user is allowed to change beacon tx rate (HT/VHT/HE) from hostapd.
This information needs to be passed to the driver when the rate control
is offloaded to the firmware. The driver capability of allowing beacon
rate is already validated in cfg80211, so simply passing the rate
information to the driver is enough.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1601762658-15627-1-git-send-email-rmanohar@codeaurora.org
[adjust commit message slightly]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4747d446179a..e8e295dae744 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -628,6 +628,8 @@ struct ieee80211_fils_discovery {
  * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response
  *	interval.
  * @s1g: BSS is S1G BSS (affects Association Request format).
+ * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed
+ *	to driver when rate control is offloaded to firmware.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -698,6 +700,7 @@ struct ieee80211_bss_conf {
 	struct ieee80211_fils_discovery fils_discovery;
 	u32 unsol_bcast_probe_resp_interval;
 	bool s1g;
+	struct cfg80211_bitrate_mask beacon_tx_rate;
 };
 
 /**
-- 
cgit v1.2.3


From 846e463a70e910f2a831aea19f9a361422a2ff5b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 7 Oct 2020 09:51:11 -0700
Subject: net/sched: get rid of qdisc->padded

kmalloc() of sufficiently big portion of memory is cache-aligned
in regular conditions. If some debugging options are used,
there is no reason qdisc structures would need 64-byte alignment
if most other kernel structures are not aligned.

This get rid of QDISC_ALIGN and QDISC_ALIGNTO.

Addition of privdata field will help implementing
the reverse of qdisc_priv() and documents where
the private data is.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Allen Pais <allen.lkml@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/pkt_sched.h   | 5 +----
 include/net/sch_generic.h | 5 ++++-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index ac8c890a2657..4ed32e6b0201 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -19,12 +19,9 @@ struct qdisc_walker {
 	int	(*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
 };
 
-#define QDISC_ALIGNTO		64
-#define QDISC_ALIGN(len)	(((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
-
 static inline void *qdisc_priv(struct Qdisc *q)
 {
-	return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc));
+	return &q->privdata;
 }
 
 /* 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6c762457122f..d8fd8676fc72 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -91,7 +91,7 @@ struct Qdisc {
 	struct net_rate_estimator __rcu *rate_est;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
-	int			padded;
+	int			pad;
 	refcount_t		refcnt;
 
 	/*
@@ -112,6 +112,9 @@ struct Qdisc {
 	/* for NOLOCK qdisc, true if there are no enqueued skbs */
 	bool			empty;
 	struct rcu_head		rcu;
+
+	/* private data */
+	long privdata[] ____cacheline_aligned;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
-- 
cgit v1.2.3


From eca43ee6c46db92dd850ce659316b0680d70e137 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Fri, 9 Oct 2020 07:03:25 +0000
Subject: bpf: Add tcp_notsent_lowat bpf setsockopt

Adding support for TCP_NOTSENT_LOWAT sockoption (https://lwn.net/Articles/560082/)
in tcp bpf programs.

Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201009070325.226855-1-tehnerd@tehnerd.com
---
 include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d83561e8cd2c..42d2df799397 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1698,7 +1698,7 @@ union bpf_attr {
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
+ *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
-- 
cgit v1.2.3


From ccdf07219da6bd1f43c6ddcde4c0e36993c7365a Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:43 +0300
Subject: devlink: Add reload action option to devlink reload command

Add devlink reload action to allow the user to request a specific reload
action. The action parameter is optional, if not specified then devlink
driver re-init action is used (backward compatible).
Note that when required to do firmware activation some drivers may need
to reload the driver. On the other hand some drivers may need to reset
the firmware to reinitialize the driver entities. Therefore, the devlink
reload command returns the actions which were actually performed.
Reload actions supported are:
driver_reinit: driver entities re-initialization, applying devlink-param
               and devlink-resource values.
fw_activate: firmware activate.

command examples:
$devlink dev reload pci/0000:82:00.0 action driver_reinit
reload_actions_performed:
  driver_reinit

$devlink dev reload pci/0000:82:00.0 action fw_activate
reload_actions_performed:
  driver_reinit fw_activate

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        |  7 ++++---
 include/uapi/linux/devlink.h | 13 +++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 237ba5e29a3b..93c535ae5a4b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1150,10 +1150,11 @@ struct devlink_ops {
 	 * implemementation.
 	 */
 	u32 supported_flash_update_params;
+	unsigned long reload_actions;
 	int (*reload_down)(struct devlink *devlink, bool netns_change,
-			   struct netlink_ext_ack *extack);
-	int (*reload_up)(struct devlink *devlink,
-			 struct netlink_ext_ack *extack);
+			   enum devlink_reload_action action, struct netlink_ext_ack *extack);
+	int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
+			 u32 *actions_performed, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 5f1d6c327670..74bdad252c36 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -301,6 +301,16 @@ enum {
 	DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE,
 };
 
+enum devlink_reload_action {
+	DEVLINK_RELOAD_ACTION_UNSPEC,
+	DEVLINK_RELOAD_ACTION_DRIVER_REINIT,	/* Driver entities re-instantiation */
+	DEVLINK_RELOAD_ACTION_FW_ACTIVATE,	/* FW activate */
+
+	/* Add new reload actions above */
+	__DEVLINK_RELOAD_ACTION_MAX,
+	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -493,6 +503,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
 	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
 
+	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
+	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From dc64cc7c63102ac78bac3cfbc00ef3abd7a3fdf3 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:44 +0300
Subject: devlink: Add devlink reload limit option

Add reload limit to demand restrictions on reload actions.
Reload limits supported:
no_reset: No reset allowed, no down time allowed, no link flap and no
          configuration is lost.

By default reload limit is unspecified and so no constraints on reload
actions are required.

Some combinations of action and limit are invalid. For example, driver
can not reinitialize its entities without any downtime.

The no_reset reload limit will have usecase in this patchset to
implement restricted fw_activate on mlx5.

Have the uapi parameter of reload limit ready for future support of
multiselection.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        |  8 ++++++--
 include/uapi/linux/devlink.h | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 93c535ae5a4b..9f5c37c391f8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1151,10 +1151,14 @@ struct devlink_ops {
 	 */
 	u32 supported_flash_update_params;
 	unsigned long reload_actions;
+	unsigned long reload_limits;
 	int (*reload_down)(struct devlink *devlink, bool netns_change,
-			   enum devlink_reload_action action, struct netlink_ext_ack *extack);
+			   enum devlink_reload_action action,
+			   enum devlink_reload_limit limit,
+			   struct netlink_ext_ack *extack);
 	int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
-			 u32 *actions_performed, struct netlink_ext_ack *extack);
+			 enum devlink_reload_limit limit, u32 *actions_performed,
+			 struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 74bdad252c36..82a5e66c1518 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -311,6 +311,19 @@ enum devlink_reload_action {
 	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
 };
 
+enum devlink_reload_limit {
+	DEVLINK_RELOAD_LIMIT_UNSPEC,	/* unspecified, no constraints */
+	DEVLINK_RELOAD_LIMIT_NO_RESET,	/* No reset allowed, no down time allowed,
+					 * no link flap and no configuration is lost.
+					 */
+
+	/* Add new reload limit above */
+	__DEVLINK_RELOAD_LIMIT_MAX,
+	DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1
+};
+
+#define DEVLINK_RELOAD_LIMITS_VALID_MASK (BIT(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -505,6 +518,7 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
 	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From a254c264267e8746fb257806c166e54375cf9c06 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:45 +0300
Subject: devlink: Add reload stats

Add reload stats to hold the history per reload action type and limit.

For example, the number of times fw_activate has been performed on this
device since the driver module was added or if the firmware activation
was performed with or without reset.

Add devlink notification on stats update.

Expose devlink reload stats to the user through devlink dev get command.

Examples:
$ devlink dev show
pci/0000:82:00.0:
  stats:
      reload:
        driver_reinit 2 fw_activate 1 fw_activate_no_reset 0
pci/0000:82:00.1:
  stats:
      reload:
        driver_reinit 1 fw_activate 0 fw_activate_no_reset 0

$ devlink dev show -jp
{
    "dev": {
        "pci/0000:82:00.0": {
            "stats": {
                "reload": {
                    "driver_reinit": 2,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                }
            }
        },
        "pci/0000:82:00.1": {
            "stats": {
                "reload": {
                    "driver_reinit": 1,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                }
            }
        }
    }
}

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        | 8 ++++++++
 include/uapi/linux/devlink.h | 6 ++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 9f5c37c391f8..d091c6ba82ce 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -20,6 +20,13 @@
 #include <uapi/linux/devlink.h>
 #include <linux/xarray.h>
 
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+	(__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+	u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
 struct devlink_ops;
 
 struct devlink {
@@ -38,6 +45,7 @@ struct devlink {
 	struct list_head trap_policer_list;
 	const struct devlink_ops *ops;
 	struct xarray snapshot_ids;
+	struct devlink_dev_stats stats;
 	struct device *dev;
 	possible_net_t _net;
 	struct mutex lock; /* Serializes access to devlink instance specific objects such as
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 82a5e66c1518..ab15fc597b74 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -520,6 +520,12 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
 	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
 
+	DEVLINK_ATTR_DEV_STATS,			/* nested */
+	DEVLINK_ATTR_RELOAD_STATS,		/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
+	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
-- 
cgit v1.2.3


From 77069ba2e3adf48c472fbbd9cbd7a4f5370b17df Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:46 +0300
Subject: devlink: Add remote reload stats

Add remote reload stats to hold the history of actions performed due
devlink reload commands initiated by remote host. For example, in case
firmware activation with reset finished successfully but was initiated
by remote host.

The function devlink_remote_reload_actions_performed() is exported to
enable drivers update on remote reload actions performed as it was not
initiated by their own devlink instance.

Expose devlink remote reload stats to the user through devlink dev get
command.

Examples:
$ devlink dev show
pci/0000:82:00.0:
  stats:
      reload:
        driver_reinit 2 fw_activate 1 fw_activate_no_reset 0
      remote_reload:
        driver_reinit 0 fw_activate 0 fw_activate_no_reset 0
pci/0000:82:00.1:
  stats:
      reload:
        driver_reinit 1 fw_activate 0 fw_activate_no_reset 0
      remote_reload:
        driver_reinit 1 fw_activate 1 fw_activate_no_reset 0

$ devlink dev show -jp
{
    "dev": {
        "pci/0000:82:00.0": {
            "stats": {
                "reload": {
                    "driver_reinit": 2,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                },
                "remote_reload": {
                    "driver_reinit": 0,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                }
            }
        },
        "pci/0000:82:00.1": {
            "stats": {
                "reload": {
                    "driver_reinit": 1,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                },
                "remote_reload": {
                    "driver_reinit": 1,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                }
            }
        }
    }
}

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        | 4 ++++
 include/uapi/linux/devlink.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d091c6ba82ce..d2771e57a278 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -25,6 +25,7 @@
 
 struct devlink_dev_stats {
 	u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+	u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
 };
 
 struct devlink_ops;
@@ -1567,6 +1568,9 @@ void
 devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
 
 bool devlink_is_reload_failed(const struct devlink *devlink);
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+					     enum devlink_reload_limit limit,
+					     u32 actions_performed);
 
 void devlink_flash_update_begin_notify(struct devlink *devlink);
 void devlink_flash_update_end_notify(struct devlink *devlink);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ab15fc597b74..0113bc4db9f5 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -525,6 +525,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
 	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
 	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
-- 
cgit v1.2.3


From 38b9f903f22b9baa5c4b9bfb07c8bbc49f5efbba Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:49 +0300
Subject: net/mlx5: Handle sync reset request event

Once the driver gets sync_reset_request from firmware it prepares for the
coming reset and sends acknowledge.
After getting this event the driver expects device reset, either it will
trigger PCI reset on sync_reset_now event or such PCI reset will be
triggered by another PF of the same device. So it moves to reset
requested mode and if it gets PCI reset triggered by the other PF it
detect the reset and reloads.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 239e5348ee09..add85094f9a5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -501,6 +501,7 @@ struct mlx5_mpfs;
 struct mlx5_eswitch;
 struct mlx5_lag;
 struct mlx5_devcom;
+struct mlx5_fw_reset;
 struct mlx5_eq_table;
 struct mlx5_irq_table;
 
@@ -578,6 +579,7 @@ struct mlx5_priv {
 	struct mlx5_core_sriov	sriov;
 	struct mlx5_lag		*lag;
 	struct mlx5_devcom	*devcom;
+	struct mlx5_fw_reset	*fw_reset;
 	struct mlx5_core_roce	roce;
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
-- 
cgit v1.2.3


From 195d9dece1686576ad1c7b45942b5cf9eacb3fbf Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:53 +0300
Subject: devlink: Add enable_remote_dev_reset generic parameter

The enable_remote_dev_reset devlink param flags that the host admin
allows device resets that can be initiated by other hosts. This
parameter is useful for setups where a device is shared by different
hosts, such as multi-host setup. Once the user set this parameter to
false, the driver should NACK any attempt to reset the device while the
driver is loaded.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d2771e57a278..b01bb9bca5a2 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -469,6 +469,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
 	DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+	DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -506,6 +507,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce"
 #define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset"
+#define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From 2d69356752ff862dbb0c7e6725874740799d7708 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:55 +0300
Subject: net/mlx5: Add support for fw live patch event

Firmware live patch event notifies the driver that the firmware was just
updated using live patch. In such case the driver should not reload or
re-initiate entities, part to updating the firmware version and
re-initiate the firmware tracer which can be updated by live patch with
new strings database to help debugging an issue.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 81ca5989009b..cf824366a7d1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -366,6 +366,7 @@ enum {
 enum {
 	MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
 	MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
+	MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT = 0x7,
 	MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT = 0x8,
 };
 
-- 
cgit v1.2.3


From 923527dcb4d164925a2fed0b53c6a1625a60a472 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 8 Oct 2020 22:49:00 -0700
Subject: net/tls: remove a duplicate function prototype

Remove one of the two instances of the function prototype for
tls_validate_xmit_skb().

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Boris Pismenny <borisp@nvidia.com>
Cc: Aviad Yehezkel <aviadye@nvidia.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index e5dac7e74e79..baf1e99d8193 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -679,10 +679,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
 struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
 
-struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
-				      struct net_device *dev,
-				      struct sk_buff *skb);
-
 int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
-- 
cgit v1.2.3


From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 8 Oct 2020 12:45:17 +0200
Subject: netlink: export policy in extended ACK

Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK
to advertise the policy, e.g. if an attribute was out of range,
you'll know the range that's permissible.

Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL()
macros to set this, since realistically it's only useful to do
this when the bad attribute (offset) is also returned.

Use it in lib/nlattr.c which practically does all the policy
validation.

v2:
 - add and use netlink_policy_dump_attr_size_estimate()
v3:
 - remove redundant break
v4:
 - really remove redundant break ... sorry

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h      | 30 ++++++++++++++++++++----------
 include/net/netlink.h        |  4 ++++
 include/uapi/linux/netlink.h |  2 ++
 3 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e3e49f0e5c13..666cd0390699 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,12 +68,14 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
  * @_msg: message string to report - don't access directly, use
  *	%NL_SET_ERR_MSG
  * @bad_attr: attribute with error
+ * @policy: policy for a bad attribute
  * @cookie: cookie data to return to userspace (for success)
  * @cookie_len: actual cookie data length
  */
 struct netlink_ext_ack {
 	const char *_msg;
 	const struct nlattr *bad_attr;
+	const struct nla_policy *policy;
 	u8 cookie[NETLINK_MAX_COOKIE_LEN];
 	u8 cookie_len;
 };
@@ -95,21 +97,29 @@ struct netlink_ext_ack {
 #define NL_SET_ERR_MSG_MOD(extack, msg)			\
 	NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
 
-#define NL_SET_BAD_ATTR(extack, attr) do {		\
-	if ((extack))					\
+#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do {	\
+	if ((extack)) {					\
 		(extack)->bad_attr = (attr);		\
+		(extack)->policy = (pol);		\
+	}						\
 } while (0)
 
-#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {	\
-	static const char __msg[] = msg;		\
-	struct netlink_ext_ack *__extack = (extack);	\
-							\
-	if (__extack) {					\
-		__extack->_msg = __msg;			\
-		__extack->bad_attr = (attr);		\
-	}						\
+#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL)
+
+#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do {	\
+	static const char __msg[] = msg;			\
+	struct netlink_ext_ack *__extack = (extack);		\
+								\
+	if (__extack) {						\
+		__extack->_msg = __msg;				\
+		__extack->bad_attr = (attr);			\
+		__extack->policy = (pol);			\
+	}							\
 } while (0)
 
+#define NL_SET_ERR_MSG_ATTR(extack, attr, msg)		\
+	NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg)
+
 static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 					    u64 cookie)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2b9e41075f19..7356f41d23ba 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1957,6 +1957,10 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
 bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
 int netlink_policy_dump_write(struct sk_buff *skb,
 			      struct netlink_policy_dump_state *state);
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt);
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+				   const struct nla_policy *pt,
+				   int nestattr);
 void netlink_policy_dump_free(struct netlink_policy_dump_state *state);
 
 #endif
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index d02e472ba54c..c3816ff7bfc3 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -129,6 +129,7 @@ struct nlmsgerr {
  * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
  *	be used - in the success case - to identify a created
  *	object or operation or similar (binary)
+ * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute
  * @__NLMSGERR_ATTR_MAX: number of attributes
  * @NLMSGERR_ATTR_MAX: highest attribute number
  */
@@ -137,6 +138,7 @@ enum nlmsgerr_attrs {
 	NLMSGERR_ATTR_MSG,
 	NLMSGERR_ATTR_OFFS,
 	NLMSGERR_ATTR_COOKIE,
+	NLMSGERR_ATTR_POLICY,
 
 	__NLMSGERR_ATTR_MAX,
 	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
-- 
cgit v1.2.3


From dd2ce6a5373c6f5c830be54be10775458a8bd312 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:01 +0200
Subject: bpf: Improve bpf_redirect_neigh helper description

Follow-up to address David's feedback that we should better describe internals
of the bpf_redirect_neigh() helper.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Link: https://lore.kernel.org/bpf/20201010234006.7075-2-daniel@iogearbox.net
---
 include/uapi/linux/bpf.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42d2df799397..4272cc53d478 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3679,10 +3679,14 @@ union bpf_attr {
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
- * 		fills in e.g. MAC addresses based on the L3 information from
- * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		populates L2 addresses as well, meaning, internally, the helper
+ * 		performs a FIB lookup based on the skb's networking header to
+ * 		get the address of the next hop and then relies on the neighbor
+ * 		lookup for the L2 address of the nexthop.
+ *
  * 		The *flags* argument is reserved and must be 0. The helper is
- * 		currently only supported for tc BPF program types.
+ * 		currently only supported for tc BPF program types, and enabled
+ * 		for IPv4 and IPv6 protocols.
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
-- 
cgit v1.2.3


From 9aa1206e8f48222f35a0c809f33b2f4aaa1e2661 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:02 +0200
Subject: bpf: Add redirect_peer helper

Add an efficient ingress to ingress netns switch that can be used out of tc BPF
programs in order to redirect traffic from host ns ingress into a container
veth device ingress without having to go via CPU backlog queue [0]. For local
containers this can also be utilized and path via CPU backlog queue only needs
to be taken once, not twice. On a high level this borrows from ipvlan which does
similar switch in __netif_receive_skb_core() and then iterates via another_round.
This helps to reduce latency for mentioned use cases.

Pod to remote pod with redirect(), TCP_RR [1]:

  # percpu_netperf 10.217.1.33
          RT_LATENCY:         122.450         (per CPU:         122.666         122.401         122.333         122.401 )
        MEAN_LATENCY:         121.210         (per CPU:         121.100         121.260         121.320         121.160 )
      STDDEV_LATENCY:         120.040         (per CPU:         119.420         119.910         125.460         115.370 )
         MIN_LATENCY:          46.500         (per CPU:          47.000          47.000          47.000          45.000 )
         P50_LATENCY:         118.500         (per CPU:         118.000         119.000         118.000         119.000 )
         P90_LATENCY:         127.500         (per CPU:         127.000         128.000         127.000         128.000 )
         P99_LATENCY:         130.750         (per CPU:         131.000         131.000         129.000         132.000 )

    TRANSACTION_RATE:       32666.400         (per CPU:        8152.200        8169.842        8174.439        8169.897 )

Pod to remote pod with redirect_peer(), TCP_RR:

  # percpu_netperf 10.217.1.33
          RT_LATENCY:          44.449         (per CPU:          43.767          43.127          45.279          45.622 )
        MEAN_LATENCY:          45.065         (per CPU:          44.030          45.530          45.190          45.510 )
      STDDEV_LATENCY:          84.823         (per CPU:          66.770          97.290          84.380          90.850 )
         MIN_LATENCY:          33.500         (per CPU:          33.000          33.000          34.000          34.000 )
         P50_LATENCY:          43.250         (per CPU:          43.000          43.000          43.000          44.000 )
         P90_LATENCY:          46.750         (per CPU:          46.000          47.000          47.000          47.000 )
         P99_LATENCY:          52.750         (per CPU:          51.000          54.000          53.000          53.000 )

    TRANSACTION_RATE:       90039.500         (per CPU:       22848.186       23187.089       22085.077       21919.130 )

  [0] https://linuxplumbersconf.org/event/7/contributions/674/attachments/568/1002/plumbers_2020_cilium_load_balancer.pdf
  [1] https://github.com/borkmann/netperf_scripts/blob/master/percpu_netperf

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201010234006.7075-3-daniel@iogearbox.net
---
 include/linux/netdevice.h |  4 ++++
 include/uapi/linux/bpf.h  | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28cfa53daf72..0533f86018dd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1277,6 +1277,9 @@ struct netdev_net_notifier {
  * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
  *			 int cmd);
  *	Add, change, delete or get information on an IPv4 tunnel.
+ * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
+ *	If a device is paired with a peer device, return the peer instance.
+ *	The caller must be under RCU read context.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1484,6 +1487,7 @@ struct net_device_ops {
 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
+	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
 };
 
 /**
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4272cc53d478..b97bc5abb3b8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3719,6 +3719,22 @@ union bpf_attr {
  *		never return NULL.
  *	Return
  *		A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*.
+ * 		This helper is somewhat similar to **bpf_redirect**\ (), except
+ * 		that the redirection happens to the *ifindex*' peer device and
+ * 		the netns switch takes place from ingress to ingress without
+ * 		going through the CPU's backlog queue.
+ *
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types at the ingress
+ * 		hook and for veth device types. The peer device must reside in a
+ * 		different network namespace.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3876,6 +3892,7 @@ union bpf_attr {
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
+	FN(redirect_peer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 4a8f87e60f6db40e640f1db555d063b2c4dea5f1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:03 +0200
Subject: bpf: Allow for map-in-map with dynamic inner array map entries

Recent work in f4d05259213f ("bpf: Add map_meta_equal map ops") and 134fede4eecf
("bpf: Relax max_entries check for most of the inner map types") added support
for dynamic inner max elements for most map-in-map types. Exceptions were maps
like array or prog array where the map_gen_lookup() callback uses the maps'
max_entries field as a constant when emitting instructions.

We recently implemented Maglev consistent hashing into Cilium's load balancer
which uses map-in-map with an outer map being hash and inner being array holding
the Maglev backend table for each service. This has been designed this way in
order to reduce overall memory consumption given the outer hash map allows to
avoid preallocating a large, flat memory area for all services. Also, the
number of service mappings is not always known a-priori.

The use case for dynamic inner array map entries is to further reduce memory
overhead, for example, some services might just have a small number of back
ends while others could have a large number. Right now the Maglev backend table
for small and large number of backends would need to have the same inner array
map entries which adds a lot of unneeded overhead.

Dynamic inner array map entries can be realized by avoiding the inlined code
generation for their lookup. The lookup will still be efficient since it will
be calling into array_map_lookup_elem() directly and thus avoiding retpoline.
The patch adds a BPF_F_INNER_MAP flag to map creation which therefore skips
inline code generation and relaxes array_map_meta_equal() check to ignore both
maps' max_entries. This also still allows to have faster lookups for map-in-map
when BPF_F_INNER_MAP is not specified and hence dynamic max_entries not needed.

Example code generation where inner map is dynamic sized array:

  # bpftool p d x i 125
  int handle__sys_enter(void * ctx):
  ; int handle__sys_enter(void *ctx)
     0: (b4) w1 = 0
  ; int key = 0;
     1: (63) *(u32 *)(r10 -4) = r1
     2: (bf) r2 = r10
  ;
     3: (07) r2 += -4
  ; inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
     4: (18) r1 = map[id:468]
     6: (07) r1 += 272
     7: (61) r0 = *(u32 *)(r2 +0)
     8: (35) if r0 >= 0x3 goto pc+5
     9: (67) r0 <<= 3
    10: (0f) r0 += r1
    11: (79) r0 = *(u64 *)(r0 +0)
    12: (15) if r0 == 0x0 goto pc+1
    13: (05) goto pc+1
    14: (b7) r0 = 0
    15: (b4) w6 = -1
  ; if (!inner_map)
    16: (15) if r0 == 0x0 goto pc+6
    17: (bf) r2 = r10
  ;
    18: (07) r2 += -4
  ; val = bpf_map_lookup_elem(inner_map, &key);
    19: (bf) r1 = r0                               | No inlining but instead
    20: (85) call array_map_lookup_elem#149280     | call to array_map_lookup_elem()
  ; return val ? *val : -1;                        | for inner array lookup.
    21: (15) if r0 == 0x0 goto pc+1
  ; return val ? *val : -1;
    22: (61) r6 = *(u32 *)(r0 +0)
  ; }
    23: (bc) w0 = w6
    24: (95) exit

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201010234006.7075-4-daniel@iogearbox.net
---
 include/linux/bpf.h      | 2 +-
 include/uapi/linux/bpf.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dc63eeed4fd9..2b16bf48aab6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -82,7 +82,7 @@ struct bpf_map_ops {
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
-	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+	int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b97bc5abb3b8..bf5a99d803e4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -435,6 +435,9 @@ enum {
 
 /* Share perf_event among processes */
 	BPF_F_PRESERVE_ELEMS	= (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+	BPF_F_INNER_MAP		= (1U << 12),
 };
 
 /* Flags for BPF_PROG_QUERY. */
-- 
cgit v1.2.3


From 60a3815da702fd9e4759945f26cce5c47d3967ad Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 8 Oct 2020 01:14:47 +0200
Subject: netfilter: add inet ingress support

This patch adds the NF_INET_INGRESS pseudohook for the NFPROTO_INET
family. This is a mapping this new hook to the existing NFPROTO_NETDEV
and NF_NETDEV_INGRESS hook. The hook does not guarantee that packets are
inet only, users must filter out non-ip traffic explicitly.

This infrastructure makes it easier to support this new hook in nf_tables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ca9e63d6e0e4..6a6179af0d7c 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -45,6 +45,7 @@ enum nf_inet_hooks {
 	NF_INET_FORWARD,
 	NF_INET_LOCAL_OUT,
 	NF_INET_POST_ROUTING,
+	NF_INET_INGRESS,
 	NF_INET_NUMHOOKS
 };
 
-- 
cgit v1.2.3


From d3519cb89f6d5949481afa5de3ee0fc6a051e231 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 8 Oct 2020 01:14:48 +0200
Subject: netfilter: nf_tables: add inet ingress support

This patch adds a new ingress hook for the inet family. The inet ingress
hook emulates the IP receive path code, therefore, unclean packets are
drop before walking over the ruleset in this basechain.

This patch also introduces the nft_base_chain_netdev() helper function
to check if this hook is bound to one or more devices (through the hook
list infrastructure). This check allows to perform the same handling for
the inet ingress as it would be a netdev ingress chain from the control
plane.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h      |  6 +++++
 include/net/netfilter/nf_tables_ipv4.h | 33 ++++++++++++++++++++++++
 include/net/netfilter/nf_tables_ipv6.h | 46 ++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0bd2a081ae39..3965ce18226f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1081,6 +1081,12 @@ struct nft_table {
 	u8				*udata;
 };
 
+static inline bool nft_base_chain_netdev(int family, u32 hooknum)
+{
+	return family == NFPROTO_NETDEV ||
+	       (family == NFPROTO_INET && hooknum == NF_INET_INGRESS);
+}
+
 void nft_register_chain_type(const struct nft_chain_type *);
 void nft_unregister_chain_type(const struct nft_chain_type *);
 
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index ed7b511f0a59..1f7bea39ad1b 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -53,4 +53,37 @@ static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
 		nft_set_pktinfo_unspec(pkt, skb);
 }
 
+static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt,
+					       struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	u32 len, thoff;
+
+	if (!pskb_may_pull(skb, sizeof(*iph)))
+		return -1;
+
+	iph = ip_hdr(skb);
+	if (iph->ihl < 5 || iph->version != 4)
+		goto inhdr_error;
+
+	len = ntohs(iph->tot_len);
+	thoff = iph->ihl * 4;
+	if (skb->len < len) {
+		__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
+		return -1;
+	} else if (len < thoff) {
+		goto inhdr_error;
+	}
+
+	pkt->tprot_set = true;
+	pkt->tprot = iph->protocol;
+	pkt->xt.thoff = thoff;
+	pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+
+	return 0;
+
+inhdr_error:
+	__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS);
+	return -1;
+}
 #endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index d0f1c537b017..867de29f3f7a 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -70,4 +70,50 @@ static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 		nft_set_pktinfo_unspec(pkt, skb);
 }
 
+static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt,
+					       struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	unsigned int flags = IP6_FH_F_AUTH;
+	unsigned short frag_off;
+	unsigned int thoff = 0;
+	struct inet6_dev *idev;
+	struct ipv6hdr *ip6h;
+	int protohdr;
+	u32 pkt_len;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+		return -1;
+
+	ip6h = ipv6_hdr(skb);
+	if (ip6h->version != 6)
+		goto inhdr_error;
+
+	pkt_len = ntohs(ip6h->payload_len);
+	if (pkt_len + sizeof(*ip6h) > skb->len) {
+		idev = __in6_dev_get(nft_in(pkt));
+		__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+		return -1;
+	}
+
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
+	if (protohdr < 0)
+		goto inhdr_error;
+
+	pkt->tprot_set = true;
+	pkt->tprot = protohdr;
+	pkt->xt.thoff = thoff;
+	pkt->xt.fragoff = frag_off;
+
+	return 0;
+
+inhdr_error:
+	idev = __in6_dev_get(nft_in(pkt));
+	__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS);
+	return -1;
+#else
+	return -1;
+#endif
+}
+
 #endif
-- 
cgit v1.2.3


From ef5659280eb13e8ac31c296f58cfdfa1684ac06b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 10 Oct 2020 22:09:38 -0700
Subject: bpf, sockmap: Allow skipping sk_skb parser program

Currently, we often run with a nop parser namely one that just does
this, 'return skb->len'. This happens when either our verdict program
can handle streaming data or it is only looking at socket data such
as IP addresses and other metadata associated with the flow. The second
case is common for a L3/L4 proxy for instance.

So lets allow loading programs without the parser then we can skip
the stream parser logic and avoid having to add a BPF program that
is effectively a nop.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/160239297866.8495.13345662302749219672.stgit@john-Precision-5820-Tower
---
 include/linux/skmsg.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 3119928fc103..fec0c5ac1c4f 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
+void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
 
 int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
 			 struct sk_msg *msg);
-- 
cgit v1.2.3


From ac911bfeb34b5d79fb4e23a08b8db0b89c529b53 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Oct 2020 09:43:53 +0200
Subject: can: isotp: implement cleanups / improvements from review

As pointed out by Jakub Kicinski here:
http://lore.kernel.org/r/20201009175751.5c54097f@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com
this patch addresses the remarked issues:

- remove empty line in comment
- remove default=y for CAN_ISOTP in Kconfig
- make use of pr_notice_once()
- use GFP_ATOMIC instead of gfp_any() in soft hrtimer context

The version strings in the CAN subsystem are removed by a separate patch.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201012074354.25839-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 553006509f4e..7793b26aa154 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -160,7 +160,6 @@ struct can_isotp_ll_options {
  * these default settings can be changed via sockopts.
  * For that reason the STmin value is intentionally _not_ checked for
  * consistency and copied directly into the flow control (FC) frame.
- *
  */
 
 #endif /* !_UAPI_CAN_ISOTP_H */
-- 
cgit v1.2.3


From f726f3d37163f714034aa5fd1f92a1a73df4297f Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Oct 2020 09:43:54 +0200
Subject: can: remove obsolete version strings

As pointed out by Jakub Kicinski here:
http://lore.kernel.org/r/20201009175751.5c54097f@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com
this patch removes the obsolete version information of the different
CAN protocols and the AF_CAN core module.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201012074354.25839-2-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h | 7 -------
 include/net/netns/can.h  | 1 -
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 7da9f1f82e8e..5fb8d0e3f9c1 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -18,13 +18,6 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#define CAN_VERSION "20170425"
-
-/* increment this number each time you change some user-space interface */
-#define CAN_ABI_VERSION "9"
-
-#define CAN_VERSION_STRING "rev " CAN_VERSION " abi " CAN_ABI_VERSION
-
 #define DNAME(dev) ((dev) ? (dev)->name : "any")
 
 /**
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index b6ab7d1530d7..52fbd8291a96 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -15,7 +15,6 @@ struct can_rcv_lists_stats;
 struct netns_can {
 #if IS_ENABLED(CONFIG_PROC_FS)
 	struct proc_dir_entry *proc_dir;
-	struct proc_dir_entry *pde_version;
 	struct proc_dir_entry *pde_stats;
 	struct proc_dir_entry *pde_reset_stats;
 	struct proc_dir_entry *pde_rcvlist_all;
-- 
cgit v1.2.3


From ee92e4f1f95eb7b8820299f10fc5fba16d85cece Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Wed, 8 Apr 2020 14:47:39 -0500
Subject: net/mlx5: Add NIC TX domain namespace

Add new namespace that represents the NIC TX domain.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 92d991d93757..846d94ad04bc 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -76,6 +76,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_SNIFFER_RX,
 	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
 	MLX5_FLOW_NAMESPACE_EGRESS,
+	MLX5_FLOW_NAMESPACE_EGRESS_KERNEL,
 	MLX5_FLOW_NAMESPACE_RDMA_RX,
 	MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
 	MLX5_FLOW_NAMESPACE_RDMA_TX,
-- 
cgit v1.2.3


From 9b9d454ddbf0c41391ed68ea82bc3d8ff6a65074 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Fri, 5 Jun 2020 20:17:51 -0500
Subject: net/mlx5e: IPsec: Add TX steering rule per IPsec state

Add new FTE in TX IPsec FT per IPsec state. It has the
same matching criteria as the RX steering rule.

The IPsec FT is created/destroyed when the first/last rule
is added/deleted respectively.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/qp.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 36492a1342cf..d75ef8aa8fac 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -245,6 +245,10 @@ enum {
 	MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
 };
 
+enum {
+	MLX5_ETH_WQE_FT_META_IPSEC = BIT(0),
+};
+
 struct mlx5_wqe_eth_seg {
 	u8              swp_outer_l4_offset;
 	u8              swp_outer_l3_offset;
@@ -253,7 +257,7 @@ struct mlx5_wqe_eth_seg {
 	u8              cs_flags;
 	u8              swp_flags;
 	__be16          mss;
-	__be32          rsvd2;
+	__be32          flow_table_metadata;
 	union {
 		struct {
 			__be16 sz;
-- 
cgit v1.2.3


From 0d9826bc18ce356e8909919ad681ad65d0a6061e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 12 Oct 2020 17:06:06 +0200
Subject: netfilter: nf_log: missing vlan offload tag and proto

Dump vlan tag and proto for the usual vlan offload case if the
NF_LOG_MACDECODE flag is set on. Without this information the logging is
misleading as there is no reference to the VLAN header.

[12716.993704] test: IN=veth0 OUT= MACSRC=86:6c:92:ea:d6:73 MACDST=0e:3b:eb:86:73:76 VPROTO=8100 VID=10 MACPROTO=0800 SRC=192.168.10.2 DST=172.217.168.163 LEN=52 TOS=0x00 PREC=0x00 TTL=64 ID=2548 DF PROTO=TCP SPT=55848 DPT=80 WINDOW=501 RES=0x00 ACK FIN URGP=0
[12721.157643] test: IN=veth0 OUT= MACSRC=86:6c:92:ea:d6:73 MACDST=0e:3b:eb:86:73:76 VPROTO=8100 VID=10 MACPROTO=0806 ARP HTYPE=1 PTYPE=0x0800 OPCODE=2 MACSRC=86:6c:92:ea:d6:73 IPSRC=192.168.10.2 MACDST=0e:3b:eb:86:73:76 IPDST=192.168.10.1

Fixes: 83e96d443b37 ("netfilter: log: split family specific code to nf_log_{ip,ip6,common}.c files")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 0d3920896d50..716db4a0fed8 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 			   unsigned int logflags);
 void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
 			    struct sock *sk);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 			       unsigned int hooknum, const struct sk_buff *skb,
 			       const struct net_device *in,
-- 
cgit v1.2.3


From 44fa32f008ab7092842095a7a474c48303a2f186 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 12 Oct 2020 10:01:27 +0200
Subject: net: add function dev_fetch_sw_netstats for fetching pcpu_sw_netstats

In several places the same code is used to populate rtnl_link_stats64
fields with data from pcpu_sw_netstats. Therefore factor out this code
to a new function dev_fetch_sw_netstats().

v2:
- constify argument netstats
- don't ignore netstats being NULL or an ERRPTR
- switch to EXPORT_SYMBOL_GPL

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/6d16a338-52f5-df69-0020-6bc771a7d498@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 948d7105e91a..964b494b0e8d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4499,6 +4499,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					struct rtnl_link_stats64 *storage);
 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 			     const struct net_device_stats *netdev_stats);
+void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
+			   const struct pcpu_sw_netstats __percpu *netstats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
-- 
cgit v1.2.3


From d25e2e9388eda61b6e298585024ee3355f50c493 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Oct 2020 21:34:32 +0200
Subject: netfilter: restore NF_INET_NUMHOOKS

This definition is used by the iptables legacy UAPI, restore it.

Fixes: d3519cb89f6d ("netfilter: nf_tables: add inet ingress support")
Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
Tested-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netfilter/nf_tables.h | 4 +++-
 include/uapi/linux/netfilter.h    | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3965ce18226f..3f7e56b1171e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -14,6 +14,8 @@
 #include <net/netlink.h>
 #include <net/flow_offload.h>
 
+#define NFT_MAX_HOOKS	(NF_INET_INGRESS + 1)
+
 struct module;
 
 #define NFT_JUMP_STACK_SIZE	16
@@ -979,7 +981,7 @@ struct nft_chain_type {
 	int				family;
 	struct module			*owner;
 	unsigned int			hook_mask;
-	nf_hookfn			*hooks[NF_MAX_HOOKS];
+	nf_hookfn			*hooks[NFT_MAX_HOOKS];
 	int				(*ops_register)(struct net *net, const struct nf_hook_ops *ops);
 	void				(*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
 };
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 6a6179af0d7c..ef9a44286e23 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -45,8 +45,8 @@ enum nf_inet_hooks {
 	NF_INET_FORWARD,
 	NF_INET_LOCAL_OUT,
 	NF_INET_POST_ROUTING,
-	NF_INET_INGRESS,
-	NF_INET_NUMHOOKS
+	NF_INET_NUMHOOKS,
+	NF_INET_INGRESS = NF_INET_NUMHOOKS,
 };
 
 enum nf_dev_hooks {
-- 
cgit v1.2.3


From d086a1c65aabb5a4e1edc580ca583e2964c62b44 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Wed, 14 Oct 2020 11:56:42 +0300
Subject: net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info

The access of tcf_tunnel_info() produces the following splat, so fix it
by dereferencing the tcf_tunnel_key_params pointer with marker that
internal tcfa_liock is held.

 =============================
 WARNING: suspicious RCU usage
 5.9.0+ #1 Not tainted
 -----------------------------
 include/net/tc_act/tc_tunnel_key.h:59 suspicious rcu_dereference_protected() usage!
 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 1 lock held by tc/34839:
  #0: ffff88828572c2a0 (&p->tcfa_lock){+...}-{2:2}, at: tc_setup_flow_action+0xb3/0x48b5
 stack backtrace:
 CPU: 1 PID: 34839 Comm: tc Not tainted 5.9.0+ #1
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x9a/0xd0
  tc_setup_flow_action+0x14cb/0x48b5
  fl_hw_replace_filter+0x347/0x690 [cls_flower]
  fl_change+0x2bad/0x4875 [cls_flower]
  tc_new_tfilter+0xf6f/0x1ba0
  rtnetlink_rcv_msg+0x5f2/0x870
  netlink_rcv_skb+0x124/0x350
  netlink_unicast+0x433/0x700
  netlink_sendmsg+0x6f1/0xbd0
  sock_sendmsg+0xb0/0xe0
  ____sys_sendmsg+0x4fa/0x6d0
  ___sys_sendmsg+0x12e/0x1b0
  __sys_sendmsg+0xa4/0x120
  do_syscall_64+0x2d/0x40
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f1f8cd4fe57
 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
 RSP: 002b:00007ffdc1e193b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1f8cd4fe57
 RDX: 0000000000000000 RSI: 00007ffdc1e19420 RDI: 0000000000000003
 RBP: 000000005f85aafa R08: 0000000000000001 R09: 00007ffdc1e1936c
 R10: 000000000040522d R11: 0000000000000246 R12: 0000000000000001
 R13: 0000000000000000 R14: 00007ffdc1e1d6f0 R15: 0000000000482420

Fixes: 3ebaf6da0716 ("net: sched: Do not assume RTNL is held in tunnel key action helpers")
Fixes: 7a47281439ba ("net: sched: lock action when translating it to flow_action infra")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_tunnel_key.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index e1057b255f69..879fe8cff581 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -56,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_tunnel_key *t = to_tunnel_key(a);
-	struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+	struct tcf_tunnel_key_params *params;
+
+	params = rcu_dereference_protected(t->params,
+					   lockdep_is_held(&a->tcfa_lock));
 
 	return &params->tcft_enc_metadata->u.tun_info;
 #else
-- 
cgit v1.2.3


From 346e320cb2103edef709c4466a29140c4a8e527a Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 15 Oct 2020 18:39:27 +0200
Subject: netfilter: nftables: allow re-computing sctp CRC-32C in 'payload'
 statements

nftables payload statements are used to mangle SCTP headers, but they can
only replace the Internet Checksum. As a consequence, nftables rules that
mangle sport/dport/vtag in SCTP headers potentially generate packets that
are discarded by the receiver, unless the CRC-32C is "offloaded" (e.g the
rule mangles a skb having 'ip_summed' equal to 'CHECKSUM_PARTIAL'.

Fix this extending uAPI definitions and L4 checksum update function, in a
way that userspace programs (e.g. nft) can instruct the kernel to compute
CRC-32C in SCTP headers. Also ensure that LIBCRC32C is built if NF_TABLES
is 'y' or 'm' in the kernel build configuration.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 352ee51707a1..98272cb5f617 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -749,10 +749,12 @@ enum nft_payload_bases {
  *
  * @NFT_PAYLOAD_CSUM_NONE: no checksumming
  * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791)
+ * @NFT_PAYLOAD_CSUM_SCTP: CRC-32c, for use in SCTP header (RFC 3309)
  */
 enum nft_payload_csum_types {
 	NFT_PAYLOAD_CSUM_NONE,
 	NFT_PAYLOAD_CSUM_INET,
+	NFT_PAYLOAD_CSUM_SCTP,
 };
 
 enum nft_payload_csum_flags {
-- 
cgit v1.2.3