diff options
Diffstat (limited to 'include/net')
28 files changed, 656 insertions, 85 deletions
diff --git a/include/net/devlink.h b/include/net/devlink.h index c9fbeb5b701f..6c51e864336a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -60,6 +61,7 @@ struct devlink_port { enum devlink_port_type desired_type; void *type_dev; struct devlink_port_attrs attrs; + struct delayed_work type_warn_dw; }; struct devlink_sb_pool_info { @@ -735,6 +737,14 @@ void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +void devlink_flash_update_begin_notify(struct devlink *devlink); +void devlink_flash_update_end_notify(struct devlink *devlink); +void devlink_flash_update_status_notify(struct devlink *devlink, + const char *status_msg, + const char *component, + unsigned long done, + unsigned long total); + #if IS_ENABLED(CONFIG_NET_DEVLINK) void devlink_compat_running_version(struct net_device *dev, diff --git a/include/net/dsa.h b/include/net/dsa.h index ba6dfff98196..1131d9fac20b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -18,6 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/platform_data/dsa.h> +#include <linux/phylink.h> #include <net/devlink.h> #include <net/switchdev.h> @@ -189,6 +190,7 @@ struct dsa_port { struct net_device *bridge_dev; struct devlink_port devlink_port; struct phylink *pl; + struct phylink_config pl_config; struct work_struct xmit_work; struct sk_buff_head xmit_queue; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b473df5b9512..eba8465e1d86 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -103,6 +103,7 @@ struct fib_rule_notifier_info { }; #define FRA_GENERIC_POLICY \ + [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 7c5a8d9a8d2a..797e19c2fc40 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -229,9 +229,8 @@ enum flow_dissector_key_id { }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) -#define FLOW_DISSECTOR_F_STOP_AT_L3 BIT(1) -#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(2) -#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(3) +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) struct flow_dissector_key { enum flow_dissector_key_id key_id; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a2df99f9b196..36fdb85c974d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -1,6 +1,7 @@ #ifndef _NET_FLOW_OFFLOAD_H #define _NET_FLOW_OFFLOAD_H +#include <linux/kernel.h> #include <net/flow_dissector.h> struct flow_match { diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 378904ee9129..e91b79ad4e4a 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -3,19 +3,24 @@ #define __NET_FRAG_H__ #include <linux/rhashtable-types.h> +#include <linux/completion.h> -struct netns_frags { +/* Per netns frag queues directory */ +struct fqdir { /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; + struct net *net; + bool dead; struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; + struct rcu_work destroy_rwork; }; /** @@ -24,11 +29,13 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction + * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), + INET_FRAG_HASH_DEAD = BIT(3), }; struct frag_v4_compare_key { @@ -64,7 +71,7 @@ struct frag_v6_compare_key { * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size - * @net: namespace that this frag belongs to + * @fqdir: pointer to struct fqdir * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { @@ -84,7 +91,7 @@ struct inet_frag_queue { int meat; __u8 flags; u16 max_size; - struct netns_frags *net; + struct fqdir *fqdir; struct rcu_head rcu; }; @@ -98,21 +105,19 @@ struct inet_frags { struct kmem_cache *frags_cachep; const char *frags_cache_name; struct rhashtable_params rhash_params; + refcount_t refcnt; + struct completion completion; }; int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) -{ - atomic_long_set(&nf->mem, 0); - return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); -} -void inet_frags_exit_net(struct netns_frags *nf); +int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); +void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); +struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ unsigned int inet_frag_rbtree_purge(struct rb_root *root); @@ -125,19 +130,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -static inline long frag_mem_limit(const struct netns_frags *nf) +static inline long frag_mem_limit(const struct fqdir *fqdir) { - return atomic_long_read(&nf->mem); + return atomic_long_read(&fqdir->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) +static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_sub(val, &nf->mem); + atomic_long_sub(val, &fqdir->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, long val) +static inline void add_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_add(val, &nf->mem); + atomic_long_add(val, &fqdir->mem); } /* RFC 3168 support : diff --git a/include/net/ip.h b/include/net/ip.h index 49c672c8cdae..6dbf88ea07f1 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,44 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); + +struct ip_fraglist_iter { + struct sk_buff *frag; + struct iphdr *iph; + int offset; + unsigned int hlen; +}; + +void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, + unsigned int hlen, struct ip_fraglist_iter *iter); +void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter); + +static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) +{ + struct sk_buff *skb = iter->frag; + + iter->frag = skb->next; + skb_mark_not_on_list(skb); + + return skb; +} + +struct ip_frag_state { + struct iphdr *iph; + unsigned int hlen; + unsigned int ll_rs; + unsigned int mtu; + unsigned int left; + int offset; + int ptr; + __be16 not_last_frag; +}; + +void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, + unsigned int mtu, struct ip_frag_state *state); +struct sk_buff *ip_frag_next(struct sk_buff *skb, + struct ip_frag_state *state); + void ip_send_check(struct iphdr *ip); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 855b352b660f..ac0427c096f3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -127,6 +127,9 @@ struct fib6_nh { #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; #endif + + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; }; struct fib6_info { @@ -139,7 +142,10 @@ struct fib6_info { * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head fib6_siblings; + union { + struct list_head fib6_siblings; + struct list_head nh_list; + }; unsigned int fib6_nsiblings; refcount_t fib6_ref; @@ -152,22 +158,19 @@ struct fib6_info { struct rt6key fib6_src; struct rt6key fib6_prefsrc; - struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; - u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; - u8 exception_bucket_flushed:1, - should_flush:1, + u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, dst_host:1, fib6_destroying:1, - unused:2; + unused:3; - struct fib6_nh fib6_nh; struct rcu_head rcu; + struct nexthop *nh; + struct fib6_nh fib6_nh[0]; }; struct rt6_info { @@ -276,7 +279,7 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) @@ -437,16 +440,17 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_unlock(); } -static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) -{ - return f6i->fib6_nh.fib_nh_dev; -} - int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib6_info *rt, + struct netlink_ext_ack *extack); +void fib6_rt_update(struct net *net, struct fib6_info *rt, + struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); @@ -480,6 +484,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4790beaa86e0..7375a165fd98 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -27,6 +27,7 @@ struct route_info { #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> +#include <net/nexthop.h> #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 @@ -66,11 +67,14 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* fib entries using a nexthop object can not be coalesced into + * a multipath route + */ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { /* the RTF_ADDRCONF flag filters out RA's */ - return !(f6i->fib6_flags & RTF_ADDRCONF) && - f6i->fib6_nh.fib_nh_gw_family; + return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh && + f6i->fib6_nh->fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); @@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + struct fib6_nh *nha, *nhb; + + if (a->nh || b->nh) + return nexthop_cmp(a->nh, b->nh); + nha = a->fib6_nh; + nhb = b->fib6_nh; return nha->fib_nh_dev == nhb->fib_nh_dev && ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index bbeff32fb6cb..4cdf8bc22efd 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -125,9 +125,12 @@ struct fib_nh { * This structure contains data shared by many of routes. */ +struct nexthop; + struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct list_head nh_list; struct net *fib_net; int fib_treeref; refcount_t fib_clntref; @@ -146,9 +149,10 @@ struct fib_info { #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; bool fib_nh_is_v6; + bool nh_updated; + struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].fib_nh_dev }; @@ -185,18 +189,14 @@ struct fib_result_nl { int err; }; -static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) -{ - return &fi->fib_nh[nhsel].nh_common; -} - #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 2 #endif -__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, + unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) @@ -227,6 +227,7 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); +void fib_info_notify_update(struct net *net, struct nl_info *info); void fib_notify(struct net *net, struct notifier_block *nb); struct fib_table { @@ -425,11 +426,14 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); +void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif +int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, + struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); @@ -451,11 +455,18 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = nh->nh_tclassid << 16; + if (nhc->nhc_family == AF_INET) { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + *itag = nh->nh_tclassid << 16; + } else { + *itag = 0; + } + #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -465,6 +476,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) #endif } +void fib_flush(struct net *net); void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2ac40135b576..cb1ad0cc5c7b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -603,6 +603,7 @@ struct ip_vs_dest_user_kern { u16 tun_type; /* tunnel type */ __be16 tun_port; /* tunnel port */ + u16 tun_flags; /* tunnel flags */ }; @@ -665,6 +666,7 @@ struct ip_vs_dest { atomic_t last_weight; /* server latest weight */ __u16 tun_type; /* tunnel type */ __be16 tun_port; /* tunnel port */ + __u16 tun_flags; /* tunnel flags */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ @@ -1404,6 +1406,9 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, struct ip_vs_dest * ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); +struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, + const union nf_inet_addr *daddr, + __be16 tun_port); int ip_vs_use_count_inc(void); void ip_vs_use_count_dec(void); @@ -1497,6 +1502,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } #endif +#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \ + IP_VS_CONN_F_FWD_MASK) + /* ip_vs_fwd_tag returns the forwarding tag of the connection */ #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 60d9480bc4d1..b41f6a0fa903 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -150,6 +150,49 @@ struct frag_hdr { #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 +struct ip6_fraglist_iter { + struct ipv6hdr *tmp_hdr; + struct sk_buff *frag; + int offset; + unsigned int hlen; + __be32 frag_id; + u8 nexthdr; +}; + +int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, + u8 nexthdr, __be32 frag_id, + struct ip6_fraglist_iter *iter); +void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter); + +static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter) +{ + struct sk_buff *skb = iter->frag; + + iter->frag = skb->next; + skb_mark_not_on_list(skb); + + return skb; +} + +struct ip6_frag_state { + u8 *prevhdr; + unsigned int hlen; + unsigned int mtu; + unsigned int left; + int offset; + int ptr; + int hroom; + int troom; + __be32 frag_id; + u8 nexthdr; +}; + +void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, + unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, + u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state); +struct sk_buff *ip6_frag_next(struct sk_buff *skb, + struct ip6_frag_state *state); + #define IP6_REPLY_MARK(net, mark) \ ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 6c0c4fde16f8..5c93e942c50b 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -45,6 +45,11 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); + int (*ip6_del_rt)(struct net *net, struct fib6_info *rt); + void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, + struct nl_info *info); + void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 12689ddfc24c..abb4f92456e1 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -19,6 +19,7 @@ #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> +#include <net/netns/nexthop.h> #include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> #include <net/netns/dccp.h> @@ -108,6 +109,7 @@ struct net { struct netns_mib mib; struct netns_packet packet; struct netns_unix unx; + struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d2bc733a2ef1..5cb19ce454d1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -49,6 +49,7 @@ union nf_conntrack_expect_proto { struct nf_conntrack_net { unsigned int users4; unsigned int users6; + unsigned int users_bridge; }; #include <linux/types.h> diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h new file mode 100644 index 000000000000..9a5514d5bc51 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bridge.h @@ -0,0 +1,20 @@ +#ifndef NF_CONNTRACK_BRIDGE_ +#define NF_CONNTRACK_BRIDGE_ + +struct nf_ct_bridge_info { + struct nf_hook_ops *ops; + unsigned int ops_size; + struct module *me; +}; + +void nf_ct_bridge_register(struct nf_ct_bridge_info *info); +void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info); + +struct nf_ct_bridge_frag_data { + char mac[ETH_HLEN]; + bool vlan_present; + u16 vlan_tci; + __be16 vlan_proto; +}; + +#endif diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index ae41e92251dd..de10faf2ce91 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) return ret; } +unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo); + void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 3e370cb36263..d8c187936bec 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,8 +53,6 @@ struct flow_offload_tuple { u8 l4proto; u8 dir; - int oifidx; - u16 mtu; struct dst_entry *dst_cache; diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 736aeac52f56..95406e1342cb 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan { struct netns_ieee802154_lowpan { struct netns_sysctl_lowpan sysctl; - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7698460a3dd1..c07cee1e0c9e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -72,7 +72,7 @@ struct netns_ipv4 { struct inet_peer_base *peers; struct sock * __percpu *tcp_sk; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; struct xt_table *iptable_mangle; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5e61b5a8635d..022a0fd1a5a4 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -58,7 +58,7 @@ struct netns_ipv6 { struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; struct inet_peer_base *peers; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; @@ -116,7 +116,7 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h new file mode 100644 index 000000000000..c712ee5eebd9 --- /dev/null +++ b/include/net/netns/nexthop.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * nexthops in net namespaces + */ + +#ifndef __NETNS_NEXTHOP_H__ +#define __NETNS_NEXTHOP_H__ + +#include <linux/rbtree.h> + +struct netns_nexthop { + struct rb_root rb_root; /* tree of nexthops by id */ + struct hlist_head *devhash; /* nexthops by device */ + + unsigned int seq; /* protected by rtnl_mutex */ + u32 last_id_allocated; +}; +#endif diff --git a/include/net/nexthop.h b/include/net/nexthop.h new file mode 100644 index 000000000000..aff7b2410057 --- /dev/null +++ b/include/net/nexthop.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic nexthop implementation + * + * Copyright (c) 2017-19 Cumulus Networks + * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> + */ + +#ifndef __LINUX_NEXTHOP_H +#define __LINUX_NEXTHOP_H + +#include <linux/netdevice.h> +#include <linux/route.h> +#include <linux/types.h> +#include <net/ip_fib.h> +#include <net/ip6_fib.h> +#include <net/netlink.h> + +#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK + +struct nexthop; + +struct nh_config { + u32 nh_id; + + u8 nh_family; + u8 nh_protocol; + u8 nh_blackhole; + u32 nh_flags; + + int nh_ifindex; + struct net_device *dev; + + union { + __be32 ipv4; + struct in6_addr ipv6; + } gw; + + struct nlattr *nh_grp; + u16 nh_grp_type; + + struct nlattr *nh_encap; + u16 nh_encap_type; + + u32 nlflags; + struct nl_info nlinfo; +}; + +struct nh_info { + struct hlist_node dev_hash; /* entry on netns devhash */ + struct nexthop *nh_parent; + + u8 family; + bool reject_nh; + + union { + struct fib_nh_common fib_nhc; + struct fib_nh fib_nh; + struct fib6_nh fib6_nh; + }; +}; + +struct nh_grp_entry { + struct nexthop *nh; + u8 weight; + atomic_t upper_bound; + + struct list_head nh_list; + struct nexthop *nh_parent; /* nexthop of group with this entry */ +}; + +struct nh_group { + u16 num_nh; + bool mpath; + bool has_v4; + struct nh_grp_entry nh_entries[0]; +}; + +struct nexthop { + struct rb_node rb_node; /* entry on netns rbtree */ + struct list_head fi_list; /* v4 entries using nh */ + struct list_head f6i_list; /* v6 entries using nh */ + struct list_head grp_list; /* nh group entries using this nh */ + struct net *net; + + u32 id; + + u8 protocol; /* app managing this nh */ + u8 nh_flags; + bool is_group; + + refcount_t refcnt; + struct rcu_head rcu; + + union { + struct nh_info __rcu *nh_info; + struct nh_group __rcu *nh_grp; + }; +}; + +/* caller is holding rcu or rtnl; no reference taken to nexthop */ +struct nexthop *nexthop_find_by_id(struct net *net, u32 id); +void nexthop_free_rcu(struct rcu_head *head); + +static inline bool nexthop_get(struct nexthop *nh) +{ + return refcount_inc_not_zero(&nh->refcnt); +} + +static inline void nexthop_put(struct nexthop *nh) +{ + if (refcount_dec_and_test(&nh->refcnt)) + call_rcu(&nh->rcu, nexthop_free_rcu); +} + +static inline bool nexthop_cmp(const struct nexthop *nh1, + const struct nexthop *nh2) +{ + return nh1 == nh2; +} + +static inline bool nexthop_is_multipath(const struct nexthop *nh) +{ + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + return nh_grp->mpath; + } + return false; +} + +struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); + +static inline unsigned int nexthop_num_path(const struct nexthop *nh) +{ + unsigned int rc = 1; + + if (nexthop_is_multipath(nh)) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + rc = nh_grp->num_nh; + } else { + const struct nh_info *nhi; + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->reject_nh) + rc = 0; + } + + return rc; +} + +static inline +struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) +{ + const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); + + /* for_nexthops macros in fib_semantics.c grabs a pointer to + * the nexthop before checking nhsel + */ + if (nhsel > nhg->num_nh) + return NULL; + + return nhg->nh_entries[nhsel].nh; +} + +static inline +int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + int i; + + for (i = 0; i < nhg->num_nh; i++) { + struct nexthop *nhe = nhg->nh_entries[i].nh; + struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); + struct fib_nh_common *nhc = &nhi->fib_nhc; + int weight = nhg->nh_entries[i].weight; + + if (fib_add_nexthop(skb, nhc, weight) < 0) + return -EMSGSIZE; + } + + return 0; +} + +/* called with rcu lock */ +static inline bool nexthop_is_blackhole(const struct nexthop *nh) +{ + const struct nh_info *nhi; + + if (nexthop_is_multipath(nh)) { + if (nexthop_num_path(nh) > 1) + return false; + nh = nexthop_mpath_select(nh, 0); + if (!nh) + return false; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + return nhi->reject_nh; +} + +static inline void nexthop_path_fib_result(struct fib_result *res, int hash) +{ + struct nh_info *nhi; + struct nexthop *nh; + + nh = nexthop_select_path(res->fi->nh, hash); + nhi = rcu_dereference(nh->nh_info); + res->nhc = &nhi->fib_nhc; +} + +/* called with rcu read lock or rtnl held */ +static inline +struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) +{ + struct nh_info *nhi; + + BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); + BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, nhsel); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + return &nhi->fib_nhc; +} + +static inline unsigned int fib_info_num_path(const struct fib_info *fi) +{ + if (unlikely(fi->nh)) + return nexthop_num_path(fi->nh); + + return fi->fib_nhs; +} + +int fib_check_nexthop(struct nexthop *nh, u8 scope, + struct netlink_ext_ack *extack); + +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + if (unlikely(fi->nh)) + return nexthop_fib_nhc(fi->nh, nhsel); + + return &fi->fib_nh[nhsel].nh_common; +} + +/* only used when fib_nh is built into fib_info */ +static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) +{ + WARN_ON(fi->nh); + + return &fi->fib_nh[nhsel]; +} + +/* + * IPv6 variants + */ +int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, + struct netlink_ext_ack *extack); + +static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) +{ + struct nh_info *nhi; + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, 0); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->family == AF_INET6) + return &nhi->fib6_nh; + + return NULL; +} + +static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) +{ + struct fib6_nh *fib6_nh; + + fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; + return fib6_nh->fib_nh_dev; +} + +static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) +{ + struct nexthop *nh = res->f6i->nh; + struct nh_info *nhi; + + nh = nexthop_select_path(nh, hash); + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->reject_nh) { + res->fib6_type = RTN_BLACKHOLE; + res->fib6_flags |= RTF_REJECT; + res->nh = nexthop_fib6_nh(nh); + } else { + res->nh = &nhi->fib6_nh; + } +} +#endif diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 314699333bec..5a9bb09f32b6 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -43,19 +43,21 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, (__force __u32)csum2, len); } +static const struct skb_checksum_ops sctp_csum_ops = { + .update = sctp_csum_update, + .combine = sctp_csum_combine, +}; + static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); - const struct skb_checksum_ops ops = { - .update = sctp_csum_update, - .combine = sctp_csum_combine, - }; __le32 old = sh->checksum; __wsum new; sh->checksum = 0; - new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops); + new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, + &sctp_csum_ops); sh->checksum = old; return cpu_to_le32((__force __u32)new); diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h new file mode 100644 index 000000000000..d6a688571672 --- /dev/null +++ b/include/net/tc_act/tc_ctinfo.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_CTINFO_H +#define __NET_TC_CTINFO_H + +#include <net/act_api.h> + +struct tcf_ctinfo_params { + struct rcu_head rcu; + struct net *net; + u32 dscpmask; + u32 dscpstatemask; + u32 cpmarkmask; + u16 zone; + u8 mode; + u8 dscpmaskshift; +}; + +struct tcf_ctinfo { + struct tc_action common; + struct tcf_ctinfo_params __rcu *params; + u64 stats_dscp_set; + u64 stats_dscp_error; + u64 stats_cpmark_set; +}; + +#define to_ctinfo(a) ((struct tcf_ctinfo *)a) + +#endif /* __NET_TC_CTINFO_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ac2f53fbfa6b..204328b88412 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1610,7 +1610,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *key, unsigned int len); + void *primary_key, void *backup_key, + unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -1621,11 +1622,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH 16 +#define TCP_FASTOPEN_KEY_MAX 2 +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher *tfm; - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; struct rcu_head rcu; }; @@ -1635,6 +1639,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); +/* Caller needs to wrap with rcu_read_(un)lock() */ +static inline +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) +{ + struct tcp_fastopen_context *ctx; + + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); + if (!ctx) + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + return ctx; +} + +static inline +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, + const struct tcp_fastopen_cookie *orig) +{ + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && + orig->len == foc->len && + !memcmp(orig->val, foc->val, foc->len)) + return true; + return false; +} + +static inline +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) +{ + if (ctx->tfm[1]) + return 2; + return 1; +} + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ diff --git a/include/net/tls.h b/include/net/tls.h index 4a55ce6a303f..3ecf45adb707 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include <linux/socket.h> #include <linux/tcp.h> #include <linux/skmsg.h> +#include <linux/netdevice.h> #include <net/tcp.h> #include <net/strparser.h> @@ -197,17 +198,16 @@ struct tls_offload_context_tx { struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; void (*sk_destruct)(struct sock *sk); - u8 driver_state[]; + u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *))) +#define TLS_DRIVER_STATE_SIZE_TX 16 }; #define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) + \ - TLS_DRIVER_STATE_SIZE) + (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) enum tls_context_flags { TLS_RX_SYNC_RUNNING = 0, @@ -240,34 +240,32 @@ struct tls_prot_info { }; struct tls_context { + /* read-only cache line */ struct tls_prot_info prot_info; - union tls_crypto_context crypto_send; - union tls_crypto_context crypto_recv; + u8 tx_conf:3; + u8 rx_conf:3; - struct list_head list; - struct net_device *netdev; - refcount_t refcount; + int (*push_pending_record)(struct sock *sk, int flags); + void (*sk_write_space)(struct sock *sk); void *priv_ctx_tx; void *priv_ctx_rx; - u8 tx_conf:3; - u8 rx_conf:3; + struct net_device *netdev; + /* rw cache line */ struct cipher_context tx; struct cipher_context rx; struct scatterlist *partially_sent_record; u16 partially_sent_offset; - unsigned long flags; bool in_tcp_sendpages; bool pending_open_record_frags; + unsigned long flags; - int (*push_pending_record)(struct sock *sk, int flags); - - void (*sk_write_space)(struct sock *sk); + /* cache cold stuff */ void (*sk_destruct)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -279,6 +277,12 @@ struct tls_context { int __user *optlen); int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + + union tls_crypto_context crypto_send; + union tls_crypto_context crypto_recv; + + struct list_head list; + refcount_t refcount; }; enum tls_offload_ctx_dir { @@ -302,16 +306,16 @@ struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; atomic64_t resync_req; - u8 driver_state[]; + u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ +#define TLS_DRIVER_STATE_SIZE_RX 8 }; #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \ - TLS_DRIVER_STATE_SIZE) + (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, @@ -446,19 +450,15 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) } static inline void tls_advance_record_sn(struct sock *sk, - struct cipher_context *ctx, - int version) + struct tls_prot_info *prot, + struct cipher_context *ctx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); - if (version != TLS_1_3_VERSION) { + if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, prot->iv_size); - } } static inline void tls_fill_prepend(struct tls_context *ctx, @@ -560,6 +560,23 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx) return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; } +#if IS_ENABLED(CONFIG_TLS_DEVICE) +static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + return tls_offload_ctx_tx(tls_ctx)->driver_state; + else + return tls_offload_ctx_rx(tls_ctx)->driver_state; +} + +static inline void * +tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) +{ + return __tls_driver_ctx(tls_get_ctx(sk), direction); +} +#endif + /* The TLS context is valid until sk_destruct is called */ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) { @@ -577,6 +594,7 @@ void tls_unregister_device(struct tls_device *device); int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); +struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); struct sk_buff *tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 83b5999a2587..dc1583a1fb8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -242,7 +242,7 @@ struct vxlan_dev { struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; - spinlock_t hash_lock; + spinlock_t hash_lock[FDB_HASH_SIZE]; unsigned int addrcnt; struct gro_cells gro_cells; |