From 107a9f4dc9211c1f91703d1739d7fd22ac58b332 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:18:54 -0400 Subject: netfilter: Add nf_hook_state initializer function. This way we can consolidate where we setup new nf_hook_state objects, to make sure the entire thing is initialized. The only other place an nf_hook_object is instantiated is nf_queue, wherein a structure copy is used. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c480c43ad8f7..b8c88f3c85ff 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,6 +54,21 @@ struct nf_hook_state { int (*okfn)(struct sk_buff *); }; +static inline void nf_hook_state_init(struct nf_hook_state *p, + unsigned int hook, + int thresh, u_int8_t pf, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + p->hook = hook; + p->thresh = thresh; + p->pf = pf; + p->in = indev; + p->out = outdev; + p->okfn = okfn; +} + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state); @@ -142,15 +157,10 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sk_buff *), int thresh) { if (nf_hooks_active(pf, hook)) { - struct nf_hook_state state = { - .hook = hook, - .thresh = thresh, - .pf = pf, - .in = indev, - .out = outdev, - .okfn = okfn - }; + struct nf_hook_state state; + nf_hook_state_init(&state, hook, thresh, pf, + indev, outdev, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 1c984f8a5df085bcf35364a8a870bd4db4da4ed3 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:00 -0400 Subject: netfilter: Add socket pointer to nf_hook_state. It is currently always set to NULL, but nf_queue is adjusted to be prepared for it being set to a real socket by taking and releasing a reference to that socket when necessary. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b8c88f3c85ff..f8f58fab2402 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -45,12 +45,15 @@ struct sk_buff; struct nf_hook_ops; +struct sock; + struct nf_hook_state { unsigned int hook; int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; + struct sock *sk; int (*okfn)(struct sk_buff *); }; @@ -59,6 +62,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int thresh, u_int8_t pf, struct net_device *indev, struct net_device *outdev, + struct sock *sk, int (*okfn)(struct sk_buff *)) { p->hook = hook; @@ -66,6 +70,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->pf = pf; p->in = indev; p->out = outdev; + p->sk = sk; p->okfn = okfn; } @@ -160,7 +165,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, okfn); + indev, outdev, NULL, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 7026b1ddb6b8d4e6ee33dc2bd06c0ca8746fa7ab Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:04 -0400 Subject: netfilter: Pass socket pointer down through okfn(). On the output paths in particular, we have to sometimes deal with two socket contexts. First, and usually skb->sk, is the local socket that generated the frame. And second, is potentially the socket used to control a tunneling socket, such as one the encapsulates using UDP. We do not want to disassociate skb->sk when encapsulating in order to fix this, because that would break socket memory accounting. The most extreme case where this can cause huge problems is an AF_PACKET socket transmitting over a vxlan device. We hit code paths doing checks that assume they are dealing with an ipv4 socket, but are actually operating upon the AF_PACKET one. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++-- include/linux/netfilter.h | 62 ++++++++++++++++++++++------------------ include/linux/netfilter_bridge.h | 2 +- include/net/dn_neigh.h | 6 ++-- include/net/ip.h | 3 +- include/net/ip6_route.h | 3 +- include/net/ipv6.h | 2 +- include/net/xfrm.h | 8 +++--- 8 files changed, 58 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41bf58a2b936..45823db2efb0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2165,8 +2165,12 @@ int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); -int dev_loopback_xmit(struct sk_buff *newskb); -int dev_queue_xmit(struct sk_buff *skb); +int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return dev_queue_xmit_sk(skb->sk, skb); +} int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); @@ -2927,7 +2931,11 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb); +static inline int netif_receive_skb(struct sk_buff *skb) +{ + return netif_receive_skb_sk(skb->sk, skb); +} gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f8f58fab2402..63560d0a8dfe 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,7 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; - int (*okfn)(struct sk_buff *); + int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, @@ -63,7 +63,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *indev, struct net_device *outdev, struct sock *sk, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { p->hook = hook; p->thresh = thresh; @@ -156,26 +156,29 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *, struct sk_buff *), + int thresh) { if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, NULL, okfn); + indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -196,35 +199,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), int thresh) +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, + struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); + int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh); if (ret == 1) - ret = okfn(skb); + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), bool cond) +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN)) == 1)) - ret = okfn(skb); + ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, +NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); + return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN); } /* Call setsockopt() */ @@ -324,19 +328,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) -#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) +#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) +#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *sk, struct sk_buff *), int thresh) { - return okfn(skb); + return okfn(sk, skb); } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { return 1; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..5fc0a0fe244b 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -30,7 +30,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index 0f26aa707e62..d0424269313f 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -18,11 +18,11 @@ struct dn_neigh { void dn_neigh_init(void); void dn_neigh_cleanup(void); -int dn_neigh_router_hello(struct sk_buff *skb); -int dn_neigh_endnode_hello(struct sk_buff *skb); +int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb); +int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb); void dn_neigh_pointopoint_hello(struct sk_buff *skb); int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n); -int dn_to_neigh_output(struct sk_buff *skb); +int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb); extern struct neigh_table dn_neigh_table; diff --git a/include/net/ip.h b/include/net/ip.h index 69cd9cb8400c..d14af7edd197 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -108,7 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index eda131d179d9..5e192068e6cb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -170,7 +170,8 @@ static inline bool ipv6_anycast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_ANYCAST; } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 65142e6af440..b6ae959824ff 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -769,7 +769,7 @@ static inline u8 ip6_tclass(__be32 flowinfo) int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); -int ip6_rcv_finish(struct sk_buff *skb); +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb); /* * upper-layer output functions diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 461f83539493..36ac102c97c7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -332,7 +332,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sk_buff *skb); + int (*output_finish)(struct sock *sk, struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1503,7 +1503,7 @@ int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_output_resume(struct sk_buff *skb, int err); -int xfrm_output(struct sk_buff *skb); +int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_header(struct sk_buff *skb); @@ -1524,7 +1524,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct sock *sk, struct sk_buff *skb); -int xfrm4_output_finish(struct sk_buff *skb); +int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); @@ -1549,7 +1549,7 @@ __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct sock *sk, struct sk_buff *skb); -int xfrm6_output_finish(struct sk_buff *skb); +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); -- cgit v1.2.3 From 79b16aadea32cce077acbe9e229fcb58a7801687 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:09 -0400 Subject: udp_tunnel: Pass UDP socket down through udp_tunnel{, 6}_xmit_skb(). That was we can make sure the output path of ipv4/ipv6 operate on the UDP socket rather than whatever random thing happens to be in skb->sk. Based upon a patch by Jiri Pirko. Signed-off-by: David S. Miller Acked-by: Hannes Frederic Sowa --- include/net/ip6_tunnel.h | 5 +++-- include/net/ipv6.h | 1 + include/net/udp_tunnel.h | 5 +++-- include/net/vxlan.h | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 1668be5937e6..b8529aa1dae7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -73,13 +73,14 @@ __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); -static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, + struct net_device *dev) { struct net_device_stats *stats = &dev->stats; int pkt_len, err; pkt_len = skb->len; - err = ip6_local_out(skb); + err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b6ae959824ff..27470cd1d5f8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -827,6 +827,7 @@ int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); int __ip6_local_out(struct sk_buff *skb); +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct sk_buff *skb); /* diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 1a20d33d56bc..c491c1221606 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -77,13 +77,14 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* Transmit the skb using UDP encapsulation. */ -int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); #if IS_ENABLED(CONFIG_IPV6) -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 756e4636bad8..0082b5d33d7d 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -145,7 +145,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, void vxlan_sock_release(struct vxlan_sock *vs); -int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, bool xnet, u32 vxflags); -- cgit v1.2.3