summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Machata <petrm@nvidia.com>2025-06-17 00:44:19 +0200
committerJakub Kicinski <kuba@kernel.org>2025-06-17 18:18:46 -0700
commitf8337efa4ff5a27e6c1d4e384166413eecd21a65 (patch)
treee2ca0b9e67801f92044a422714987e134a03d362
parent96e8f5a9fe2d91b9f9eb8b45cc13ce1ca6a8af82 (diff)
vxlan: Support MC routing in the underlay
Locally-generated MC packets have so far not been subject to MC routing. Instead an MC-enabled installation would maintain the MC routing tables, and separately from that the list of interfaces to send packets to as part of the VXLAN FDB and MDB. In a previous patch, a ip_mr_output() and ip6_mr_output() routines were added for IPv4 and IPv6. All locally generated MC traffic is now passed through these functions. For reasons of backward compatibility, an SKB (IPCB / IP6CB) flag guards the actual MC routing. This patch adds logic to set the flag, and the UAPI to enable the behavior. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org> Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/vxlan/vxlan_core.c22
-rw-r--r--include/net/vxlan.h5
-rw-r--r--include/uapi/linux/if_link.h1
3 files changed, 25 insertions, 3 deletions
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index b22f9866be8e..a6cc1de4d8b8 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
rcu_read_lock();
if (addr_family == AF_INET) {
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+ u16 ipcb_flags = 0;
struct rtable *rt;
__be16 df = 0;
__be32 saddr;
@@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+ if (flags & VXLAN_F_MC_ROUTE)
+ ipcb_flags |= IPSKB_MCROUTE;
+
if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
pkey->u.ipv4.dst, tos, ttl, df,
- src_port, dst_port, xnet, !udp_sum, 0);
+ src_port, dst_port, xnet, !udp_sum,
+ ipcb_flags);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
struct in6_addr saddr;
+ u16 ip6cb_flags = 0;
if (!ifindex)
ifindex = sock6->sock->sk->sk_bound_dev_if;
@@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+ if (flags & VXLAN_F_MC_ROUTE)
+ ip6cb_flags |= IP6SKB_MCROUTE;
+
if (!info) {
u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;
@@ -2587,7 +2596,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
&saddr, &pkey->u.ipv6.dst, tos, ttl,
pkey->label, src_port, dst_port, !udp_sum,
- 0);
+ ip6cb_flags);
#endif
}
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3402,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
[IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
[IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
+ [IFLA_VXLAN_MC_ROUTE] = NLA_POLICY_MAX(NLA_U8, 1),
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4315,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
return err;
}
+ if (data[IFLA_VXLAN_MC_ROUTE]) {
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE,
+ VXLAN_F_MC_ROUTE, changelink,
+ true, extack);
+ if (err)
+ return err;
+ }
+
if (tb[IFLA_MTU]) {
if (changelink) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index e2f7ca045d3e..0ee50785f4f1 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -332,6 +332,7 @@ struct vxlan_dev {
#define VXLAN_F_VNIFILTER 0x20000
#define VXLAN_F_MDB 0x40000
#define VXLAN_F_LOCALBYPASS 0x80000
+#define VXLAN_F_MC_ROUTE 0x100000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -353,7 +354,9 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_COLLECT_METADATA | \
VXLAN_F_VNIFILTER | \
- VXLAN_F_LOCALBYPASS)
+ VXLAN_F_LOCALBYPASS | \
+ VXLAN_F_MC_ROUTE | \
+ 0)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3ad2d5d98034..873c285996fe 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1398,6 +1398,7 @@ enum {
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
IFLA_VXLAN_RESERVED_BITS,
+ IFLA_VXLAN_MC_ROUTE,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)