diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-01 21:06:29 +1100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-01 21:06:29 +1100 |
commit | cec03afcb62fbbb0eaf943f6349ade61b89d7d40 (patch) | |
tree | cc80c13e373337d1c1dee9dd7269173da1f7c079 /net/ipv4 | |
parent | 2da53b0134ad41b91556d2d2a322cc03487a1ab7 (diff) | |
parent | 4814bdbd590e835ecec2d5e505165ec1c19796b2 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (173 commits)
[NETNS]: Lookup in FIB semantic hashes taking into account the namespace.
[NETNS]: Add a namespace mark to fib_info.
[IPV4]: fib_sync_down rework.
[NETNS]: Process interface address manipulation routines in the namespace.
[IPV4]: Small style cleanup of the error path in rtm_to_ifaddr.
[IPV4]: Fix memory leak on error path during FIB initialization.
[NETFILTER]: Ipv6-related xt_hashlimit compilation fix.
[NET_SCHED]: Add flow classifier
[NET_SCHED]: sch_sfq: make internal queues visible as classes
[NET_SCHED]: sch_sfq: add support for external classifiers
[NET_SCHED]: Constify struct tcf_ext_map
[BLUETOOTH]: Fix bugs in previous conn add/del workqueue changes.
[TCP]: Unexport sysctl_tcp_tso_win_divisor
[IPV4]: Make struct ipv4_devconf static.
[TR] net/802/tr.c: sysctl_tr_rif_timeout static
[XFRM]: Fix statistics.
[XFRM]: Remove unused exports.
[PKT_SCHED] sch_teql.c: Duplicate IFF_BROADCAST in FMASK, remove 2nd.
[BNX2]: Fix ASYM PAUSE advertisement for remote PHY.
[IPV4] route cache: Introduce rt_genid for smooth cache invalidation
...
Diffstat (limited to 'net/ipv4')
47 files changed, 1102 insertions, 748 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 24e2b7294bf8..19880b086e71 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -343,6 +343,7 @@ config INET_ESP tristate "IP: ESP transformation" select XFRM select CRYPTO + select CRYPTO_AEAD select CRYPTO_HMAC select CRYPTO_MD5 select CRYPTO_CBC diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d76803a3dcae..9d4555ec0b59 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -300,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x) } -static struct xfrm_type ah_type = +static const struct xfrm_type ah_type = { .description = "AH4", .owner = THIS_MODULE, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5976c598cc4b..8e17f65f4002 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -558,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) */ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw) + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw) { struct sk_buff *skb; struct arphdr *arp; @@ -672,8 +673,8 @@ void arp_xmit(struct sk_buff *skb) */ void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw) + const unsigned char *dest_hw, const unsigned char *src_hw, + const unsigned char *target_hw) { struct sk_buff *skb; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 21f71bf912d5..f282b26f63eb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,7 +64,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> -struct ipv4_devconf ipv4_devconf = { +static struct ipv4_devconf ipv4_devconf = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, @@ -485,46 +485,41 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - int err = -EINVAL; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err < 0) goto errout; ifm = nlmsg_data(nlh); - if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) { - err = -EINVAL; + err = -EINVAL; + if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) goto errout; - } - dev = __dev_get_by_index(&init_net, ifm->ifa_index); - if (dev == NULL) { - err = -ENODEV; + dev = __dev_get_by_index(net, ifm->ifa_index); + err = -ENODEV; + if (dev == NULL) goto errout; - } in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) { - err = -ENOBUFS; + err = -ENOBUFS; + if (in_dev == NULL) goto errout; - } ifa = inet_alloc_ifa(); - if (ifa == NULL) { + if (ifa == NULL) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. */ - err = -ENOBUFS; goto errout; - } ipv4_devconf_setall(in_dev); in_dev_hold(in_dev); @@ -568,7 +563,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (net != &init_net) return -EINVAL; - ifa = rtm_to_ifaddr(nlh); + ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -1182,7 +1177,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1216,7 +1211,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; int err = -ENOBUFS; + struct net *net; + net = ifa->ifa_dev->dev->nd_net; skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; @@ -1228,10 +1225,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 28ea5c77ca23..258d17631b4b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,27 +1,118 @@ +#include <crypto/aead.h> +#include <crypto/authenc.h> #include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> #include <linux/scatterlist.h> -#include <linux/crypto.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> -#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/in6.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/udp.h> +struct esp_skb_cb { + struct xfrm_skb_cb xfrm; + void *tmp; +}; + +#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) + +/* + * Allocate an AEAD request structure with extra space for SG and IV. + * + * For alignment considerations the IV is placed at the front, followed + * by the request and finally the SG list. + * + * TODO: Use spare space in skb for this where possible. + */ +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +{ + unsigned int len; + + len = crypto_aead_ivsize(aead); + if (len) { + len += crypto_aead_alignmask(aead) & + ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + } + + len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len = ALIGN(len, __alignof__(struct scatterlist)); + + len += sizeof(struct scatterlist) * nfrags; + + return kmalloc(len, GFP_ATOMIC); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +{ + return crypto_aead_ivsize(aead) ? + PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; +} + +static inline struct aead_givcrypt_request *esp_tmp_givreq( + struct crypto_aead *aead, u8 *iv) +{ + struct aead_givcrypt_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_givcrypt_set_tfm(req, aead); + return req; +} + +static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) +{ + struct aead_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_request_set_tfm(req, aead); + return req; +} + +static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, + struct aead_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static inline struct scatterlist *esp_givreq_sg( + struct crypto_aead *aead, struct aead_givcrypt_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static void esp_output_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + kfree(ESP_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); +} + static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; - struct crypto_blkcipher *tfm; - struct blkcipher_desc desc; + struct crypto_aead *aead; + struct aead_givcrypt_request *req; + struct scatterlist *sg; + struct scatterlist *asg; struct esp_data *esp; struct sk_buff *trailer; + void *tmp; + u8 *iv; u8 *tail; int blksize; int clen; @@ -36,18 +127,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) clen = skb->len; esp = x->data; - alen = esp->auth.icv_trunc_len; - tfm = esp->conf.tfm; - desc.tfm = tfm; - desc.flags = 0; - blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); + aead = esp->aead; + alen = crypto_aead_authsize(aead); + + blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(clen + 2, blksize); - if (esp->conf.padlen) - clen = ALIGN(clen, esp->conf.padlen); + if (esp->padlen) + clen = ALIGN(clen, esp->padlen); + + if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) + goto error; + nfrags = err; - if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) goto error; + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_givreq(aead, iv); + asg = esp_givreq_sg(aead, req); + sg = asg + 1; + /* Fill padding... */ tail = skb_tail_pointer(trailer); do { @@ -56,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) tail[i] = i + 1; } while (0); tail[clen - skb->len - 2] = (clen - skb->len) - 2; - pskb_put(skb, trailer, clen - skb->len); + tail[clen - skb->len - 1] = *skb_mac_header(skb); + pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); esph = ip_esp_hdr(skb); - *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ESP; - spin_lock_bh(&x->lock); - /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh; __be32 *udpdata32; + unsigned int sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); uh = (struct udphdr *)esph; - uh->source = encap->encap_sport; - uh->dest = encap->encap_dport; - uh->len = htons(skb->len + alen - skb_transport_offset(skb)); + uh->source = sport; + uh->dest = dport; + uh->len = htons(skb->len - skb_transport_offset(skb)); uh->check = 0; - switch (encap->encap_type) { + switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: esph = (struct ip_esp_hdr *)(uh + 1); @@ -95,131 +201,45 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); - if (esp->conf.ivlen) { - if (unlikely(!esp->conf.ivinitted)) { - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); - esp->conf.ivinitted = 1; - } - crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } - - do { - struct scatterlist *sg = &esp->sgbuf[0]; - - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) - goto unlock; - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - esph->enc_data + - esp->conf.ivlen - - skb->data, clen); - err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - } while (0); - - if (unlikely(err)) - goto unlock; - - if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); - crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + esph->enc_data + crypto_aead_ivsize(aead) - skb->data, + clen + alen); + sg_init_one(asg, esph, sizeof(*esph)); + + aead_givcrypt_set_callback(req, 0, esp_output_done, skb); + aead_givcrypt_set_crypt(req, sg, sg, clen, iv); + aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); + + ESP_SKB_CB(skb)->tmp = tmp; + err = crypto_aead_givencrypt(req); + if (err == -EINPROGRESS) + goto error; - if (esp->auth.icv_full_len) { - err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, - sizeof(*esph) + esp->conf.ivlen + clen); - memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); - } + if (err == -EBUSY) + err = NET_XMIT_DROP; -unlock: - spin_unlock_bh(&x->lock); + kfree(tmp); error: return err; } -/* - * Note: detecting truncated vs. non-truncated authentication data is very - * expensive, so we only support truncated data, which is the recommended - * and common case. - */ -static int esp_input(struct xfrm_state *x, struct sk_buff *skb) +static int esp_input_done2(struct sk_buff *skb, int err) { struct iphdr *iph; - struct ip_esp_hdr *esph; + struct xfrm_state *x = xfrm_input_state(skb); struct esp_data *esp = x->data; - struct crypto_blkcipher *tfm = esp->conf.tfm; - struct blkcipher_desc desc = { .tfm = tfm }; - struct sk_buff *trailer; - int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); - int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; - int nfrags; + struct crypto_aead *aead = esp->aead; + int alen = crypto_aead_authsize(aead); + int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + int elen = skb->len - hlen; int ihl; u8 nexthdr[2]; - struct scatterlist *sg; int padlen; - int err = -EINVAL; - - if (!pskb_may_pull(skb, sizeof(*esph))) - goto out; - - if (elen <= 0 || (elen & (blksize-1))) - goto out; - - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) - goto out; - nfrags = err; - - skb->ip_summed = CHECKSUM_NONE; - - spin_lock(&x->lock); - - /* If integrity check is required, do this. */ - if (esp->auth.icv_full_len) { - u8 sum[alen]; - err = esp_mac_digest(esp, skb, 0, skb->len - alen); - if (err) - goto unlock; - - if (skb_copy_bits(skb, skb->len - alen, sum, alen)) - BUG(); - - if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - err = -EBADMSG; - goto unlock; - } - } - - esph = (struct ip_esp_hdr *)skb->data; - - /* Get ivec. This can be wrong, check against another impls. */ - if (esp->conf.ivlen) - crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); - - sg = &esp->sgbuf[0]; - - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - err = -ENOMEM; - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) - goto unlock; - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - sizeof(*esph) + esp->conf.ivlen, - elen); - err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - -unlock: - spin_unlock(&x->lock); + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; @@ -229,15 +249,11 @@ unlock: err = -EINVAL; padlen = nexthdr[0]; - if (padlen+2 >= elen) + if (padlen + 2 + alen >= elen) goto out; /* ... check padding bits here. Silly. :-) */ - /* RFC4303: Drop dummy packets without any error */ - if (nexthdr[1] == IPPROTO_NONE) - goto out; - iph = ip_hdr(skb); ihl = iph->ihl * 4; @@ -279,10 +295,87 @@ unlock: } pskb_trim(skb, skb->len - alen - padlen - 2); - __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); + __skb_pull(skb, hlen); skb_set_transport_header(skb, -ihl); - return nexthdr[1]; + err = nexthdr[1]; + + /* RFC4303: Drop dummy packets without any error */ + if (err == IPPROTO_NONE) + err = -EINVAL; + +out: + return err; +} + +static void esp_input_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + xfrm_input_resume(skb, esp_input_done2(skb, err)); +} + +/* + * Note: detecting truncated vs. non-truncated authentication data is very + * expensive, so we only support truncated data, which is the recommended + * and common case. + */ +static int esp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct esp_data *esp = x->data; + struct crypto_aead *aead = esp->aead; + struct aead_request *req; + struct sk_buff *trailer; + int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); + int nfrags; + void *tmp; + u8 *iv; + struct scatterlist *sg; + struct scatterlist *asg; + int err = -EINVAL; + + if (!pskb_may_pull(skb, sizeof(*esph))) + goto out; + + if (elen <= 0) + goto out; + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; + + err = -ENOMEM; + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) + goto out; + + ESP_SKB_CB(skb)->tmp = tmp; + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_req(aead, iv); + asg = esp_req_sg(aead, req); + sg = asg + 1; + + skb->ip_summed = CHECKSUM_NONE; + + esph = (struct ip_esp_hdr *)skb->data; + + /* Get ivec. This can be wrong, check against another impls. */ + iv = esph->enc_data; + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + sg_init_one(asg, esph, sizeof(*esph)); + + aead_request_set_callback(req, 0, esp_input_done, skb); + aead_request_set_crypt(req, sg, sg, elen, iv); + aead_request_set_assoc(req, asg, sizeof(*esph)); + + err = crypto_aead_decrypt(req); + if (err == -EINPROGRESS) + goto out; + + err = esp_input_done2(skb, err); out: return err; @@ -291,11 +384,11 @@ out: static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - u32 align = max_t(u32, blksize, esp->conf.padlen); + u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); + u32 align = max_t(u32, blksize, esp->padlen); u32 rem; - mtu -= x->props.header_len + esp->auth.icv_trunc_len; + mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); rem = mtu & (align - 1); mtu &= ~(align - 1); @@ -342,80 +435,143 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_blkcipher(esp->conf.tfm); - esp->conf.tfm = NULL; - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - crypto_free_hash(esp->auth.tfm); - esp->auth.tfm = NULL; - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; + crypto_free_aead(esp->aead); kfree(esp); } -static int esp_init_state(struct xfrm_state *x) +static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = NULL; - struct crypto_blkcipher *tfm; - u32 align; + struct esp_data *esp = x->data; + struct crypto_aead *aead; + int err; + + aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + err = crypto_aead_setkey(aead, x->aead->alg_key, + (x->aead->alg_key_len + 7) / 8); + if (err) + goto error; + + err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8); + if (err) + goto error; + +error: + return err; +} +static int esp_init_authenc(struct xfrm_state *x) +{ + struct esp_data *esp = x->data; + struct crypto_aead *aead; + struct crypto_authenc_key_param *param; + struct rtattr *rta; + char *key; + char *p; + char authenc_name[CRYPTO_MAX_ALG_NAME]; + unsigned int keylen; + int err; + + err = -EINVAL; if (x->ealg == NULL) goto error; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; + err = -ENAMETOOLONG; + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + + aead = crypto_alloc_aead(authenc_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); + err = -ENOMEM; + key = kmalloc(keylen, GFP_KERNEL); + if (!key) + goto error; + + p = key; + rta = (void *)p; + rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; + rta->rta_len = RTA_LENGTH(sizeof(*param)); + param = RTA_DATA(rta); + p += RTA_SPACE(sizeof(*param)); if (x->aalg) { struct xfrm_algo_desc *aalg_desc; - struct crypto_hash *hash; - hash = crypto_alloc_hash(x->aalg->alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - goto error; - - esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) - goto error; + memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8); + p += (x->aalg->alg_key_len + 7) / 8; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); + err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_hash_digestsize(hash)) { + crypto_aead_authsize(aead)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_hash_digestsize(hash), + crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + goto free_key; } - esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; - - esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); - if (!esp->auth.work_icv) - goto error; + err = crypto_aead_setauthsize( + aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + if (err) + goto free_key; } - tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - esp->conf.tfm = tfm; - esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); - esp->conf.padlen = 0; - if (esp->conf.ivlen) { - esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); - if (unlikely(esp->conf.ivec == NULL)) - goto error; - esp->conf.ivinitted = 0; - } - if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, - (x->ealg->alg_key_len + 7) / 8)) + param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); + memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); + + err = crypto_aead_setkey(aead, key, keylen); + +free_key: + kfree(key); + +error: + return err; +} + +static int esp_init_state(struct xfrm_state *x) +{ + struct esp_data *esp; + struct crypto_aead *aead; + u32 align; + int err; + + esp = kzalloc(sizeof(*esp), GFP_KERNEL); + if (esp == NULL) + return -ENOMEM; + + x->data = esp; + + if (x->aead) + err = esp_init_aead(x); + else + err = esp_init_authenc(x); + + if (err) goto error; - x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; + + aead = esp->aead; + + esp->padlen = 0; + + x->props.header_len = sizeof(struct ip_esp_hdr) + + crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); else if (x->props.mode == XFRM_MODE_BEET) @@ -434,21 +590,17 @@ static int esp_init_state(struct xfrm_state *x) break; } } - x->data = esp; - align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (esp->conf.padlen) - align = max_t(u32, align, esp->conf.padlen); - x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; - return 0; + + align = ALIGN(crypto_aead_blocksize(aead), 4); + if (esp->padlen) + align = max_t(u32, align, esp->padlen); + x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); error: - x->data = esp; - esp_destroy(x); - x->data = NULL; - return -EINVAL; + return err; } -static struct xfrm_type esp_type = +static const struct xfrm_type esp_type = { .description = "ESP4", .owner = THIS_MODULE, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d28261826bc2..86ff2711fc95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa->ifa_local, NULL, 0)) + if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) fib_flush(dev->nd_net); } } @@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) + if (fib_sync_down_dev(dev, force)) fib_flush(dev->nd_net); rt_cache_flush(0); arp_ifdown(dev); @@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = { static int __net_init ip_fib_net_init(struct net *net) { + int err; unsigned int i; net->ipv4.fib_table_hash = kzalloc( @@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net) for (i = 0; i < FIB_TABLE_HASHSZ; i++) INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); - return fib4_rules_init(net); + err = fib4_rules_init(net); + if (err < 0) + goto fail; + return 0; + +fail: + kfree(net->ipv4.fib_table_hash); + return err; } static void __net_exit ip_fib_net_exit(struct net *net) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index a15b2f1b2721..76b9c684cccd 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -424,19 +424,43 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) if (fa && fa->fa_tos == tos && fa->fa_info->fib_priority == fi->fib_priority) { - struct fib_alias *fa_orig; + struct fib_alias *fa_first, *fa_match; err = -EEXIST; if (cfg->fc_nlflags & NLM_F_EXCL) goto out; + /* We have 2 goals: + * 1. Find exact match for type, scope, fib_info to avoid + * duplicate routes + * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it + */ + fa_match = NULL; + fa_first = fa; + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { + if (fa->fa_tos != tos) + break; + if (fa->fa_info->fib_priority != fi->fib_priority) + break; + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && + fa->fa_info == fi) { + fa_match = fa; + break; + } + } + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; - if (fi->fib_treeref > 1) + fa = fa_first; + if (fa_match) { + if (fa == fa_match) + err = 0; goto out; - + } write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; @@ -459,20 +483,11 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) * uses the same scope, type, and nexthop * information. */ - fa_orig = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - if (fa->fa_tos != tos) - break; - if (fa->fa_info->fib_priority != fi->fib_priority) - break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && - fa->fa_info == fi) - goto out; - } + if (fa_match) + goto out; + if (!(cfg->fc_nlflags & NLM_F_APPEND)) - fa = fa_orig; + fa = fa_first; } err = -ENOENT; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7912866d987..a13c84763d4c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = &fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { + if (fi->fib_net != nfi->fib_net) + continue; if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && @@ -687,6 +689,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; + struct net *net = cfg->fc_nlinfo.nl_net; /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) @@ -727,6 +730,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; + fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; @@ -798,8 +802,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net, - fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -813,8 +816,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (fi->fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fi->fib_prefsrc != cfg->fc_dst) - if (inet_addr_type(cfg->fc_nlinfo.nl_net, - fi->fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -1031,70 +1033,74 @@ nla_put_failure: referring to it. - device went down -> we must shutdown all nexthops going via it. */ - -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down_addr(struct net *net, __be32 local) { int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; + unsigned int hash = fib_laddr_hashfn(local); + struct hlist_head *head = &fib_info_laddrhash[hash]; + struct hlist_node *node; + struct fib_info *fi; - if (local && fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; - struct hlist_node *node; - struct fib_info *fi; + if (fib_info_laddrhash == NULL || local == 0) + return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi->fib_prefsrc == local) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } + hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi->fib_net != net) + continue; + if (fi->fib_prefsrc == local) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; } } + return ret; +} - if (dev) { - struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; - struct hlist_node *node; - struct fib_nh *nh; +int fib_sync_down_dev(struct net_device *dev, int force) +{ + int ret = 0; + int scope = RT_SCOPE_NOWHERE; + struct fib_info *prev_fi = NULL; + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_node *node; + struct fib_nh *nh; - hlist_for_each_entry(nh, node, head, nh_hash) { - struct fib_info *fi = nh->nh_parent; - int dead; + if (force) + scope = -1; - BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) - continue; - prev_fi = fi; - dead = 0; - change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) - dead++; - else if (nh->nh_dev == dev && - nh->nh_scope != scope) { - nh->nh_flags |= RTNH_F_DEAD; + hlist_for_each_entry(nh, node, head, nh_hash) { + struct fib_info *fi = nh->nh_parent; + int dead; + + BUG_ON(!fi->fib_nhs); + if (nh->nh_dev != dev || fi == prev_fi) + continue; + prev_fi = fi; + dead = 0; + change_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + dead++; + else if (nh->nh_dev == dev && + nh->nh_scope != scope) { + nh->nh_flags |= RTNH_F_DEAD; #ifdef CONFIG_IP_ROUTE_MULTIPATH - spin_lock_bh(&fib_multipath_lock); - fi->fib_power -= nh->nh_power; - nh->nh_power = 0; - spin_unlock_bh(&fib_multipath_lock); + spin_lock_bh(&fib_multipath_lock); + fi->fib_power -= nh->nh_power; + nh->nh_power = 0; + spin_unlock_bh(&fib_multipath_lock); #endif - dead++; - } + dead++; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (force > 1 && nh->nh_dev == dev) { - dead = fi->fib_nhs; - break; - } -#endif - } endfor_nexthops(fi) - if (dead == fi->fib_nhs) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; + if (force > 1 && nh->nh_dev == dev) { + dead = fi->fib_nhs; + break; } +#endif + } endfor_nexthops(fi) + if (dead == fi->fib_nhs) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f2f47033f31f..35851c96bdfb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1205,20 +1205,45 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) * and we need to allocate a new one of those as well. */ - if (fa && fa->fa_info->fib_priority == fi->fib_priority) { - struct fib_alias *fa_orig; + if (fa && fa->fa_tos == tos && + fa->fa_info->fib_priority == fi->fib_priority) { + struct fib_alias *fa_first, *fa_match; err = -EEXIST; if (cfg->fc_nlflags & NLM_F_EXCL) goto out; + /* We have 2 goals: + * 1. Find exact match for type, scope, fib_info to avoid + * duplicate routes + * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it + */ + fa_match = NULL; + fa_first = fa; + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, fa_head, fa_list) { + if (fa->fa_tos != tos) + break; + if (fa->fa_info->fib_priority != fi->fib_priority) + break; + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && + fa->fa_info == fi) { + fa_match = fa; + break; + } + } + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; - if (fi->fib_treeref > 1) + fa = fa_first; + if (fa_match) { + if (fa == fa_match) + err = 0; goto out; - + } err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) @@ -1230,7 +1255,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_type = cfg->fc_type; new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; - new_fa->fa_state &= ~FA_S_ACCESSED; + new_fa->fa_state = state & ~FA_S_ACCESSED; list_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1247,20 +1272,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) * uses the same scope, type, and nexthop * information. */ - fa_orig = fa; - list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { - if (fa->fa_tos != tos) - break; - if (fa->fa_info->fib_priority != fi->fib_priority) - break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && - fa->fa_info == fi) - goto out; - } + if (fa_match) + goto out; if (!(cfg->fc_nlflags & NLM_F_APPEND)) - fa = fa_orig; + fa = fa_first; } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) @@ -1600,9 +1616,8 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; - fa_head = fa->fa_list.prev; - - list_for_each_entry(fa, fa_head, fa_list) { + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, fa_head, fa_list) { struct fib_info *fi = fa->fa_info; if (fa->fa_tos != tos) @@ -1743,6 +1758,19 @@ static struct leaf *trie_nextleaf(struct leaf *l) return leaf_walk_rcu(p, c); } +static struct leaf *trie_leafindex(struct trie *t, int index) +{ + struct leaf *l = trie_firstleaf(t); + + while (index-- > 0) { + l = trie_nextleaf(l); + if (!l) + break; + } + return l; +} + + /* * Caller must hold RTNL. */ @@ -1848,7 +1876,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_alias *fa; __be32 xkey = htonl(key); - s_i = cb->args[4]; + s_i = cb->args[5]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1869,12 +1897,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, plen, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; + cb->args[5] = i; return -1; } i++; } - cb->args[4] = i; + cb->args[5] = i; return skb->len; } @@ -1885,7 +1913,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, struct hlist_node *node; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1896,19 +1924,19 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, } if (i > s_i) - cb->args[4] = 0; + cb->args[5] = 0; if (list_empty(&li->falh)) continue; if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1918,35 +1946,37 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct leaf *l; struct trie *t = (struct trie *) tb->tb_data; t_key key = cb->args[2]; + int count = cb->args[3]; rcu_read_lock(); /* Dump starting at last key. * Note: 0.0.0.0/0 (ie default) is first key. */ - if (!key) + if (count == 0) l = trie_firstleaf(t); else { + /* Normally, continue from last key, but if that is missing + * fallback to using slow rescan + */ l = fib_find_node(t, key); - if (!l) { - /* The table changed during the dump, rather than - * giving partial data, just make application retry. - */ - rcu_read_unlock(); - return -EBUSY; - } + if (!l) + l = trie_leafindex(t, count); } while (l) { cb->args[2] = l->key; if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + cb->args[3] = count; rcu_read_unlock(); return -1; } + ++count; l = trie_nextleaf(l); - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); } + cb->args[3] = count; rcu_read_unlock(); return skb->len; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7801cceb2d1b..de5a41de191a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, struct hlist_node *node; struct inet_bind_bucket *tb; int ret; + struct net *net = sk->sk_net; local_bh_disable(); if (!snum) { @@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) + if (tb->ib_net == net && tb->port == rover) goto next; break; next: @@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) + if (tb->ib_net == net && tb->port == snum) goto tb_found; } tb = NULL; @@ -147,7 +148,8 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, + net, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 605ed2cd7972..da97695e7096 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -259,20 +259,22 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct inet_diag_handler *handler; handler = inet_diag_lock_handler(nlh->nlmsg_type); - if (!handler) - return -ENOENT; + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto unlock; + } hashinfo = handler->idiag_hashinfo; err = -EINVAL; if (req->idiag_family == AF_INET) { - sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->idiag_family == AF_INET6) { - sk = inet6_lookup(hashinfo, + sk = inet6_lookup(&init_net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, @@ -708,8 +710,8 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct inet_hashinfo *hashinfo; handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); - if (!handler) - goto no_handler; + if (IS_ERR(handler)) + goto unlock; hashinfo = handler->idiag_hashinfo; @@ -838,7 +840,6 @@ done: cb->args[2] = num; unlock: inet_diag_unlock_handler(handler); -no_handler: return skb->len; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 619c63c6948a..48d45008f749 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -28,12 +28,14 @@ * The bindhash mutex for snum's hash chain must be held here. */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, + struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { + tb->ib_net = net; tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -125,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, +static struct sock *inet_lookup_listener_slow(struct net *net, + const struct hlist_head *head, const __be32 daddr, const unsigned short hnum, const int dif) @@ -137,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, sk_for_each(sk, node, head) { const struct inet_sock *inet = inet_sk(sk); - if (inet->num == hnum && !ipv6_only_sock(sk)) { + if (sk->sk_net == net && inet->num == hnum && + !ipv6_only_sock(sk)) { const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; @@ -163,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, } /* Optimize the common listener case. */ -struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, +struct sock *__inet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -178,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) + !sk->sk_bound_dev_if && sk->sk_net == net) goto sherry_cache; - sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); } if (sk) { sherry_cache: @@ -191,7 +196,8 @@ sherry_cache: } EXPORT_SYMBOL_GPL(__inet_lookup_listener); -struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, +struct sock * __inet_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif) @@ -210,13 +216,15 @@ struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, prefetch(head->chain.first); read_lock(lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif)) goto hit; } sk = NULL; @@ -247,6 +255,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + struct net *net = sk->sk_net; prefetch(head->chain.first); write_lock(lock); @@ -255,7 +264,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, net, hash, acookie, + saddr, daddr, ports, dif)) { if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -266,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, net, hash, acookie, + saddr, daddr, ports, dif)) goto not_unique; } @@ -348,17 +359,18 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL_GPL(__inet_hash); -/* - * Bind a port for a connect operation and hash it. - */ -int inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk) +int __inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **), + void (*hash)(struct inet_hashinfo *, struct sock *)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; + struct net *net = sk->sk_net; if (!snum) { int i, remaining, low, high, port; @@ -381,19 +393,19 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if (tb->ib_net == net && tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; - if (!__inet_check_established(death_row, - sk, port, - &tw)) + if (!check_established(death_row, sk, + port, &tw)) goto ok; goto next_port; } } - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -415,7 +427,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); - __inet_hash_nolisten(hinfo, sk); + hash(hinfo, sk); } spin_unlock(&head->lock); @@ -432,17 +444,28 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash_nolisten(hinfo, sk); + hash(hinfo, sk); spin_unlock_bh(&head->lock); return 0; } else { spin_unlock(&head->lock); /* No definite answer... Walk to established hash table */ - ret = __inet_check_established(death_row, sk, snum, NULL); + ret = check_established(death_row, sk, snum, NULL); out: local_bh_enable(); return ret; } } +EXPORT_SYMBOL_GPL(__inet_hash_connect); + +/* + * Bind a port for a connect operation and hash it. + */ +int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + return __inet_hash_connect(death_row, sk, + __inet_check_established, __inet_hash_nolisten); +} EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 18070ca65771..341779e685d9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -168,6 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; /* Send it out. */ return ip_local_out(skb); @@ -385,6 +386,7 @@ packet_routed: (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; return ip_local_out(skb); @@ -476,6 +478,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) if (skb_shinfo(skb)->frag_list) { struct sk_buff *frag; int first_len = skb_pagelen(skb); + int truesizes = 0; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -499,7 +502,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; - skb->truesize -= frag->truesize; + truesizes += frag->truesize; } } @@ -510,6 +513,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) frag = skb_shinfo(skb)->frag_list; skb_shinfo(skb)->frag_list = NULL; skb->data_len = first_len - skb_headlen(skb); + skb->truesize -= truesizes; skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); @@ -1284,6 +1288,7 @@ int ip_push_pending_frames(struct sock *sk) iph->daddr = rt->rt_dst; skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f4af99ad8fdb..ae1f45fc23b9 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -74,6 +74,7 @@ out: static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) { + int nexthdr; int err = -ENOMEM; struct ip_comp_hdr *ipch; @@ -84,13 +85,15 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) /* Remove ipcomp header and decompress original payload */ ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + skb->transport_header = skb->network_header + sizeof(*ipch); __skb_pull(skb, sizeof(*ipch)); err = ipcomp_decompress(x, skb); if (err) goto out; - err = ipch->nexthdr; + err = nexthdr; out: return err; @@ -434,7 +437,7 @@ error: goto out; } -static struct xfrm_type ipcomp_type = { +static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b4a810c28ac8..a7591ce344d2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -22,6 +22,7 @@ #include <linux/mutex.h> #include <linux/err.h> #include <net/compat.h> +#include <net/sock.h> #include <asm/uaccess.h> #include <linux/netfilter/x_tables.h> @@ -850,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[ARPT_TABLE_MAXNAMELEN]; struct arpt_table *t; @@ -870,7 +871,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(NF_ARP); #endif - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -908,7 +909,8 @@ static int get_info(void __user *user, int *len, int compat) return ret; } -static int get_entries(struct arpt_get_entries __user *uptr, int *len) +static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, + int *len) { int ret; struct arpt_get_entries get; @@ -926,7 +928,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(NF_ARP, get.name); + t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -947,7 +949,8 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) return ret; } -static int __do_replace(const char *name, unsigned int valid_hooks, +static int __do_replace(struct net *net, const char *name, + unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) @@ -966,7 +969,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1019,7 +1022,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, return ret; } -static int do_replace(void __user *user, unsigned int len) +static int do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct arpt_replace tmp; @@ -1053,7 +1056,7 @@ static int do_replace(void __user *user, unsigned int len) duprintf("arp_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; @@ -1080,7 +1083,8 @@ static inline int add_counter_to_entry(struct arpt_entry *e, return 0; } -static int do_add_counters(void __user *user, unsigned int len, int compat) +static int do_add_counters(struct net *net, void __user *user, unsigned int len, + int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1132,7 +1136,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(NF_ARP, name); + t = xt_find_table_lock(net, NF_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1435,7 +1439,8 @@ struct compat_arpt_replace { struct compat_arpt_entry entries[0]; }; -static int compat_do_replace(void __user *user, unsigned int len) +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) { int ret; struct compat_arpt_replace tmp; @@ -1471,7 +1476,7 @@ static int compat_do_replace(void __user *user, unsigned int len) duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; @@ -1494,11 +1499,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1584,7 +1589,8 @@ struct compat_arpt_get_entries { struct compat_arpt_entry entrytable[0]; }; -static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, +static int compat_get_entries(struct net *net, + struct compat_arpt_get_entries __user *uptr, int *len) { int ret; @@ -1604,7 +1610,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, } xt_compat_lock(NF_ARP); - t = xt_find_table_lock(NF_ARP, get.name); + t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1641,10 +1647,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case ARPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_arpt_get_ctl(sk, cmd, user, len); @@ -1662,11 +1668,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -1686,11 +1692,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case ARPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case ARPT_SO_GET_REVISION_TARGET: { @@ -1719,19 +1725,21 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -int arpt_register_table(struct arpt_table *table, - const struct arpt_replace *repl) +struct arpt_table *arpt_register_table(struct net *net, + struct arpt_table *table, + const struct arpt_replace *repl) { int ret; struct xt_table_info *newinfo; struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) { ret = -ENOMEM; - return ret; + goto out; } /* choose the copy on our node/cpu */ @@ -1745,24 +1753,27 @@ int arpt_register_table(struct arpt_table *table, repl->underflow); duprintf("arpt_register_table: translate table gives %d\n", ret); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; } + return new_table; - return 0; +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void arpt_unregister_table(struct arpt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; + struct module *table_owner = table->me; private = xt_unregister_table(table); @@ -1770,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table) loc_cpu_entry = private->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + if (private->number > private->initial_entries) + module_put(table_owner); xt_free_table_info(private); } @@ -1809,11 +1822,26 @@ static struct nf_sockopt_ops arpt_sockopts = { .owner = THIS_MODULE, }; +static int __net_init arp_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NF_ARP); +} + +static void __net_exit arp_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NF_ARP); +} + +static struct pernet_operations arp_tables_net_ops = { + .init = arp_tables_net_init, + .exit = arp_tables_net_exit, +}; + static int __init arp_tables_init(void) { int ret; - ret = xt_proto_init(NF_ARP); + ret = register_pernet_subsys(&arp_tables_net_ops); if (ret < 0) goto err1; @@ -1838,7 +1866,7 @@ err4: err3: xt_unregister_target(&arpt_standard_target); err2: - xt_proto_fini(NF_ARP); + unregister_pernet_subsys(&arp_tables_net_ops); err1: return ret; } @@ -1848,7 +1876,7 @@ static void __exit arp_tables_fini(void) nf_unregister_sockopt(&arpt_sockopts); xt_unregister_target(&arpt_error_target); xt_unregister_target(&arpt_standard_target); - xt_proto_fini(NF_ARP); + unregister_pernet_subsys(&arp_tables_net_ops); } EXPORT_SYMBOL(arpt_register_table); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 7201511d54d2..4e9c496a30c2 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -20,7 +20,7 @@ static struct struct arpt_replace repl; struct arpt_standard entries[3]; struct arpt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -61,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, &packet_filter); + return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { @@ -85,12 +85,31 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { }, }; +static int __net_init arptable_filter_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.arptable_filter = + arpt_register_table(net, &packet_filter, &initial_table.repl); + if (IS_ERR(net->ipv4.arptable_filter)) + return PTR_ERR(net->ipv4.arptable_filter); + return 0; +} + +static void __net_exit arptable_filter_net_exit(struct net *net) +{ + arpt_unregister_table(net->ipv4.arptable_filter); +} + +static struct pernet_operations arptable_filter_net_ops = { + .init = arptable_filter_net_init, + .exit = arptable_filter_net_exit, +}; + static int __init arptable_filter_init(void) { int ret; - /* Register table */ - ret = arpt_register_table(&packet_filter, &initial_table.repl); + ret = register_pernet_subsys(&arptable_filter_net_ops); if (ret < 0) return ret; @@ -100,14 +119,14 @@ static int __init arptable_filter_init(void) return ret; cleanup_table: - arpt_unregister_table(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); return ret; } static void __exit arptable_filter_fini(void) { nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); - arpt_unregister_table(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); } module_init(arptable_filter_init); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5109839da222..6bda1102851b 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -512,6 +512,7 @@ static struct notifier_block ipq_nl_notifier = { .notifier_call = ipq_rcv_nl_event, }; +#ifdef CONFIG_SYSCTL static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { @@ -525,7 +526,9 @@ static ctl_table ipq_table[] = { }, { .ctl_name = 0 } }; +#endif +#ifdef CONFIG_PROC_FS static int ip_queue_show(struct seq_file *m, void *v) { read_lock_bh(&queue_lock); @@ -562,6 +565,7 @@ static const struct file_operations ip_queue_proc_fops = { .release = single_release, .owner = THIS_MODULE, }; +#endif static const struct nf_queue_handler nfqh = { .name = "ip_queue", @@ -571,7 +575,7 @@ static const struct nf_queue_handler nfqh = { static int __init ip_queue_init(void) { int status = -ENOMEM; - struct proc_dir_entry *proc; + struct proc_dir_entry *proc __maybe_unused; netlink_register_notifier(&ipq_nl_notifier); ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, @@ -581,6 +585,7 @@ static int __init ip_queue_init(void) goto cleanup_netlink_notifier; } +#ifdef CONFIG_PROC_FS proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); if (proc) { proc->owner = THIS_MODULE; @@ -589,10 +594,11 @@ static int __init ip_queue_init(void) printk(KERN_ERR "ip_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } - +#endif register_netdevice_notifier(&ipq_dev_notifier); +#ifdef CONFIG_SYSCTL ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); - +#endif status = nf_register_queue_handler(PF_INET, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); @@ -601,10 +607,12 @@ static int __init ip_queue_init(void) return status; cleanup_sysctl: +#ifdef CONFIG_SYSCTL unregister_sysctl_table(ipq_sysctl_header); +#endif unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); -cleanup_ipqnl: +cleanup_ipqnl: __maybe_unused netlink_kernel_release(ipqnl); mutex_lock(&ipqnl_mutex); mutex_unlock(&ipqnl_mutex); @@ -620,7 +628,9 @@ static void __exit ip_queue_fini(void) synchronize_net(); ipq_flush(NULL, 0); +#ifdef CONFIG_SYSCTL unregister_sysctl_table(ipq_sysctl_header); +#endif unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 982b7f986291..600737f122d2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -291,7 +291,7 @@ static void trace_packet(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - char *tablename, + const char *tablename, struct xt_table_info *private, struct ipt_entry *e) { @@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1112,7 +1112,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; @@ -1152,7 +1152,7 @@ static int get_info(void __user *user, int *len, int compat) } static int -get_entries(struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; @@ -1170,7 +1170,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -1191,7 +1191,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) } static int -__do_replace(const char *name, unsigned int valid_hooks, +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { @@ -1208,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1261,7 +1261,7 @@ __do_replace(const char *name, unsigned int valid_hooks, } static int -do_replace(void __user *user, unsigned int len) +do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; @@ -1295,7 +1295,7 @@ do_replace(void __user *user, unsigned int len) duprintf("ip_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; @@ -1331,7 +1331,7 @@ add_counter_to_entry(struct ipt_entry *e, } static int -do_add_counters(void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1383,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(AF_INET, name); + t = xt_find_table_lock(net, AF_INET, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1429,7 +1429,7 @@ struct compat_ipt_replace { static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, - compat_uint_t *size, struct xt_counters *counters, + unsigned int *size, struct xt_counters *counters, unsigned int *i) { struct ipt_entry_target *t; @@ -1476,7 +1476,7 @@ compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, - int *size, int *i) + int *size, unsigned int *i) { struct xt_match *match; @@ -1534,7 +1534,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct ipt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h, j; + unsigned int j; + int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 @@ -1647,7 +1648,8 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { - int j, ret; + unsigned int j; + int ret; j = 0; ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, @@ -1789,7 +1791,7 @@ out_unlock: } static int -compat_do_replace(void __user *user, unsigned int len) +compat_do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct compat_ipt_replace tmp; @@ -1826,7 +1828,7 @@ compat_do_replace(void __user *user, unsigned int len) duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; @@ -1850,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1903,7 +1905,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, } static int -compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, + int *len) { int ret; struct compat_ipt_get_entries get; @@ -1924,7 +1927,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) } xt_compat_lock(AF_INET); - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1960,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_ipt_get_ctl(sk, cmd, user, len); @@ -1982,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -2007,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case IPT_SO_GET_REVISION_MATCH: @@ -2048,17 +2051,21 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, + const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) - return -ENOMEM; + if (!newinfo) { + ret = -ENOMEM; + goto out; + } /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; @@ -2069,30 +2076,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) repl->num_entries, repl->hook_entry, repl->underflow); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; } - return 0; + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void ipt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; + struct module *table_owner = table->me; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + if (private->number > private->initial_entries) + module_put(table_owner); xt_free_table_info(private); } @@ -2200,11 +2213,26 @@ static struct xt_match icmp_matchstruct __read_mostly = { .family = AF_INET, }; +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, AF_INET); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, AF_INET); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, +}; + static int __init ip_tables_init(void) { int ret; - ret = xt_proto_init(AF_INET); + ret = register_pernet_subsys(&ip_tables_net_ops); if (ret < 0) goto err1; @@ -2234,7 +2262,7 @@ err4: err3: xt_unregister_target(&ipt_standard_target); err2: - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); err1: return ret; } @@ -2247,7 +2275,7 @@ static void __exit ip_tables_fini(void) xt_unregister_target(&ipt_error_target); xt_unregister_target(&ipt_standard_target); - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1b31f7d14d46..c6cf84c77611 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -76,13 +76,6 @@ clusterip_config_put(struct clusterip_config *c) kfree(c); } -/* increase the count of entries(rules) using/referencing this config */ -static inline void -clusterip_config_entry_get(struct clusterip_config *c) -{ - atomic_inc(&c->entries); -} - /* decrease the count of entries using/referencing this config. If last * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index e3154a99c08a..68cbe3ca01ce 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -212,11 +212,11 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in, recent_entry_remove(t, e); ret = !ret; } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { - unsigned long t = jiffies - info->seconds * HZ; + unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; for (i = 0; i < e->nstamps; i++) { - if (info->seconds && time_after(t, e->stamps[i])) + if (info->seconds && time_after(time, e->stamps[i])) continue; if (++hits >= info->hit_count) { ret = !ret; @@ -320,6 +320,7 @@ struct recent_iter_state { }; static void *recent_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(recent_lock) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; @@ -352,6 +353,7 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void recent_seq_stop(struct seq_file *s, void *v) + __releases(recent_lock) { spin_unlock_bh(&recent_lock); } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 29bb4f9fbda0..69f3d7e6e96f 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -28,7 +28,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -69,7 +69,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); } static unsigned int @@ -88,7 +88,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { @@ -119,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { static int forward = NF_ACCEPT; module_param(forward, bool, 0000); +static int __net_init iptable_filter_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_filter = + ipt_register_table(net, &packet_filter, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_filter)) + return PTR_ERR(net->ipv4.iptable_filter); + return 0; +} + +static void __net_exit iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_filter); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; + static int __init iptable_filter_init(void) { int ret; @@ -131,8 +151,7 @@ static int __init iptable_filter_init(void) /* Entry 1 is the FORWARD hook */ initial_table.entries[1].target.verdict = -forward - 1; - /* Register table */ - ret = ipt_register_table(&packet_filter, &initial_table.repl); + ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; @@ -144,14 +163,14 @@ static int __init iptable_filter_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); return ret; } static void __exit iptable_filter_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); } module_init(iptable_filter_init); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 5c4be202430c..c55a210853a7 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[5]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -80,7 +80,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_mangler); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); } static unsigned int @@ -112,7 +112,7 @@ ipt_local_hook(unsigned int hook, daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, &packet_mangler); + ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -166,12 +166,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { }, }; +static int __net_init iptable_mangle_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_mangle = + ipt_register_table(net, &packet_mangler, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_mangle)) + return PTR_ERR(net->ipv4.iptable_mangle); + return 0; +} + +static void __net_exit iptable_mangle_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_mangle); +} + +static struct pernet_operations iptable_mangle_net_ops = { + .init = iptable_mangle_net_init, + .exit = iptable_mangle_net_exit, +}; + static int __init iptable_mangle_init(void) { int ret; - /* Register table */ - ret = ipt_register_table(&packet_mangler, &initial_table.repl); + ret = register_pernet_subsys(&iptable_mangle_net_ops); if (ret < 0) return ret; @@ -183,14 +202,14 @@ static int __init iptable_mangle_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); return ret; } static void __exit iptable_mangle_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); } module_init(iptable_mangle_init); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index dc34aa274533..e41fe8ca4e1c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -14,7 +14,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[2]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -52,7 +52,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); } static unsigned int @@ -70,7 +70,7 @@ ipt_local_hook(unsigned int hook, "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); } /* 'raw' is the very first table. */ @@ -91,12 +91,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { }, }; +static int __net_init iptable_raw_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_raw = + ipt_register_table(net, &packet_raw, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_raw)) + return PTR_ERR(net->ipv4.iptable_raw); + return 0; +} + +static void __net_exit iptable_raw_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_raw); +} + +static struct pernet_operations iptable_raw_net_ops = { + .init = iptable_raw_net_init, + .exit = iptable_raw_net_exit, +}; + static int __init iptable_raw_init(void) { int ret; - /* Register table */ - ret = ipt_register_table(&packet_raw, &initial_table.repl); + ret = register_pernet_subsys(&iptable_raw_net_ops); if (ret < 0) return ret; @@ -108,14 +127,14 @@ static int __init iptable_raw_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_raw); + unregister_pernet_subsys(&iptable_raw_net_ops); return ret; } static void __exit iptable_raw_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_raw); + unregister_pernet_subsys(&iptable_raw_net_ops); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ac3d61d8026e..a65b845c5f15 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -27,7 +27,8 @@ static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { - __be32 _addrs[2], *ap; + const __be32 *ap; + __be32 _addrs[2]; ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), sizeof(u_int32_t) * 2, _addrs); if (ap == NULL) @@ -76,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { - struct iphdr _iph, *iph; + const struct iphdr *iph; + struct iphdr _iph; iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (iph == NULL) @@ -111,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - struct nf_conn_help *help; - struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -299,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = { static int getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - struct inet_sock *inet = inet_sk(sk); - struct nf_conntrack_tuple_hash *h; + const struct inet_sock *inet = inet_sk(sk); + const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; NF_CT_TUPLE_U_BLANK(&tuple); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 543c02b74c96..089252e82c01 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -39,12 +39,14 @@ struct ct_iter_state { static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!hlist_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].first; + n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, { struct ct_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].first; + head = rcu_dereference(nf_conntrack_hash[st->bucket].first); } return head; } @@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_get_idx(seq, *pos); } @@ -86,16 +89,17 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void ct_seq_stop(struct seq_file *s, void *v) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int ct_seq_show(struct seq_file *s, void *v) { const struct nf_conntrack_tuple_hash *hash = v; const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; NF_CT_ASSERT(ct); @@ -191,10 +195,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) - return nf_ct_expect_hash[st->bucket].first; + n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -204,11 +210,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, { struct ct_expect_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = nf_ct_expect_hash[st->bucket].first; + head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); } return head; } @@ -224,8 +230,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) } static void *exp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_expect_get_idx(seq, *pos); } @@ -236,14 +243,15 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void exp_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *exp; - struct hlist_node *n = v; + const struct hlist_node *n = v; exp = hlist_entry(n, struct nf_conntrack_expect, hnode); @@ -324,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); - struct ip_conntrack_stat *st = v; + const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4004a04c5510..6873fddb3529 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -26,7 +26,8 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { - struct icmphdr _hdr, *hp; + const struct icmphdr *hp; + struct icmphdr _hdr; hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hp == NULL) @@ -100,7 +101,7 @@ static int icmp_packet(struct nf_conn *ct, } /* Called when a new connection for this protocol found. */ -static int icmp_new(struct nf_conn *conntrack, +static int icmp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { static const u_int8_t valid_new[] = { @@ -110,15 +111,15 @@ static int icmp_new(struct nf_conn *conntrack, [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ pr_debug("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); - NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + ct->tuplehash[0].tuple.dst.u.icmp.type); + NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); return 0; } - atomic_set(&conntrack->proto.icmp.count, 0); + atomic_set(&ct->proto.icmp.count, 0); return 1; } @@ -129,8 +130,8 @@ icmp_error_message(struct sk_buff *skb, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; - struct nf_conntrack_l4proto *innerproto; - struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_l4proto *innerproto; + const struct nf_conntrack_tuple_hash *h; NF_CT_ASSERT(skb->nfct == NULL); @@ -176,7 +177,8 @@ static int icmp_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) { - struct icmphdr _ih, *icmph; + const struct icmphdr *icmph; + struct icmphdr _ih; /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index e53ae1ef8f5e..dd07362d2b8f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -31,7 +31,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> -static DEFINE_RWLOCK(nf_nat_lock); +static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; @@ -154,8 +154,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, struct nf_conn *ct; struct hlist_node *n; - read_lock_bh(&nf_nat_lock); - hlist_for_each_entry(nat, n, &bysource[h], bysource) { + rcu_read_lock(); + hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -164,12 +164,12 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, result->dst = tuple->dst; if (in_range(result, range)) { - read_unlock_bh(&nf_nat_lock); + rcu_read_unlock(); return 1; } } } - read_unlock_bh(&nf_nat_lock); + rcu_read_unlock(); return 0; } @@ -330,12 +330,12 @@ nf_nat_setup_info(struct nf_conn *ct, unsigned int srchash; srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - write_lock_bh(&nf_nat_lock); + spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head(&nat->bysource, &bysource[srchash]); - write_unlock_bh(&nf_nat_lock); + hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + spin_unlock_bh(&nf_nat_lock); } /* It's done. */ @@ -521,14 +521,14 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) { int ret = 0; - write_lock_bh(&nf_nat_lock); + spin_lock_bh(&nf_nat_lock); if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { ret = -EBUSY; goto out; } rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); out: - write_unlock_bh(&nf_nat_lock); + spin_unlock_bh(&nf_nat_lock); return ret; } EXPORT_SYMBOL(nf_nat_protocol_register); @@ -536,10 +536,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { - write_lock_bh(&nf_nat_lock); + spin_lock_bh(&nf_nat_lock); rcu_assign_pointer(nf_nat_protos[proto->protonum], &nf_nat_unknown_protocol); - write_unlock_bh(&nf_nat_lock); + spin_unlock_bh(&nf_nat_lock); synchronize_rcu(); } EXPORT_SYMBOL(nf_nat_protocol_unregister); @@ -594,10 +594,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); - write_lock_bh(&nf_nat_lock); - hlist_del(&nat->bysource); + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); nat->ct = NULL; - write_unlock_bh(&nf_nat_lock); + spin_unlock_bh(&nf_nat_lock); } static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) @@ -609,10 +609,10 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) return; - write_lock_bh(&nf_nat_lock); + spin_lock_bh(&nf_nat_lock); hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); new_nat->ct = ct; - write_unlock_bh(&nf_nat_lock); + spin_unlock_bh(&nf_nat_lock); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -646,17 +646,13 @@ static int __init nf_nat_init(void) } /* Sew in builtin protocols. */ - write_lock_bh(&nf_nat_lock); + spin_lock_bh(&nf_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); - write_unlock_bh(&nf_nat_lock); - - for (i = 0; i < nf_nat_htable_size; i++) { - INIT_HLIST_HEAD(&bysource[i]); - } + spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a121989fdad7..ee47bf28c825 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -32,7 +32,8 @@ static int set_addr(struct sk_buff *skb, __be32 ip; __be16 port; } __attribute__ ((__packed__)) buf; - struct tcphdr _tcph, *th; + const struct tcphdr *th; + struct tcphdr _tcph; buf.ip = ip; buf.port = port; @@ -99,7 +100,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); int i; __be16 port; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 4c0232842e75..ca57f47bbd25 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -44,8 +44,7 @@ adjust_tcp_sequence(u32 seq, struct nf_nat_seq *this_way, *other_way; struct nf_conn_nat *nat = nfct_nat(ct); - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", - ntohl(seq), seq); + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq); dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index e63b944a2ebb..3a1e6d6afc0a 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_conn *master = ct->master; + const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; - struct nf_ct_pptp_master *ct_pptp_info; - struct nf_nat_pptp *nat_pptp_info; + const struct nf_ct_pptp_master *ct_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_range range; ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; @@ -186,7 +186,7 @@ static void pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_conntrack_expect *expect_reply) { - struct nf_conn *ct = expect_orig->master; + const struct nf_conn *ct = expect_orig->master; struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; @@ -217,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) { - struct nf_nat_pptp *nat_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; u_int16_t msg; __be16 new_pcid; unsigned int pcid_off; diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 9fa272e73113..a1e4da16da2e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -59,7 +59,7 @@ static int gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, - const struct nf_conn *conntrack) + const struct nf_conn *ct) { static u_int16_t key; __be16 *keyptr; @@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ - if (!conntrack->master) + if (!ct->master) return 0; if (maniptype == IP_NAT_MANIP_SRC) @@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, keyptr = &tuple->dst.u.gre.key; if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - pr_debug("%p: NATing GRE PPTP\n", conntrack); + pr_debug("%p: NATing GRE PPTP\n", ct); min = 1; range_size = 0xffff; } else { @@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); - if (!nf_nat_used_tuple(tuple, conntrack)) + if (!nf_nat_used_tuple(tuple, ct)) return 1; } - pr_debug("%p: no NAT mapping\n", conntrack); + pr_debug("%p: no NAT mapping\n", ct); return 0; } @@ -104,7 +104,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, { struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required @@ -148,12 +148,12 @@ static const struct nf_nat_protocol gre = { #endif }; -int __init nf_nat_proto_gre_init(void) +static int __init nf_nat_proto_gre_init(void) { return nf_nat_protocol_register(&gre); } -void __exit nf_nat_proto_gre_fini(void) +static void __exit nf_nat_proto_gre_fini(void) { nf_nat_protocol_unregister(&gre); } diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index a0e44c953cb6..03a02969aa57 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -57,7 +57,7 @@ icmp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index da23e9fbe679..ffd5d1589eca 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 10df4db078af..4b8f49910ff2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -91,7 +91,7 @@ udp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 519182269e76..f8fda57ba20b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -58,13 +58,14 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table nat_table = { +static struct xt_table __nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; +static struct xt_table *nat_table; /* Source NAT */ static unsigned int ipt_snat_target(struct sk_buff *skb, @@ -214,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb, { int ret; - ret = ipt_do_table(skb, hooknum, in, out, &nat_table); + ret = ipt_do_table(skb, hooknum, in, out, nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -248,9 +249,10 @@ int __init nf_nat_rule_init(void) { int ret; - ret = ipt_register_table(&nat_table, &nat_initial_table.repl); - if (ret != 0) - return ret; + nat_table = ipt_register_table(&init_net, &__nat_table, + &nat_initial_table.repl); + if (IS_ERR(nat_table)) + return PTR_ERR(nat_table); ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -264,7 +266,7 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(&nat_table); + ipt_unregister_table(nat_table); return ret; } @@ -273,5 +275,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(&nat_table); + ipt_unregister_table(nat_table); } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 606a170bf4ca..b4c8d4968bb2 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -35,9 +35,9 @@ struct addr_map { } addr[IP_CT_DIR_MAX]; }; -static void addr_map_init(struct nf_conn *ct, struct addr_map *map) +static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) { - struct nf_conntrack_tuple *t; + const struct nf_conntrack_tuple *t; enum ip_conntrack_dir dir; unsigned int n; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 07f2a49926d4..540ce6ae887c 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -260,7 +260,7 @@ static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) { unsigned char ch; - if (eoc == 0) { + if (eoc == NULL) { if (!asn1_octet_decode(ctx, &ch)) return 0; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 1360a94766dd..b096e81500ae 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -24,7 +24,7 @@ static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) { - struct nf_conn *ct = exp->master; + const struct nf_conn *ct = exp->master; exp->saved_proto.udp.port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 85c08696abbe..a3002fe65b7f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -352,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb_reserve(skb, hh_len); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); skb_reset_network_header(skb); @@ -544,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct flowi fl = { .oif = ipc.oif, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -860,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &state->h->ht[state->bucket]) - if (sk->sk_net == state->p.net && - sk->sk_family == state->family) + if (sk->sk_net == state->p.net) goto found; } sk = NULL; @@ -877,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_net != state->p.net && - sk->sk_family != state->family); + } while (sk && sk->sk_net != state->p.net); if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { sk = sk_head(&state->h->ht[state->bucket]); @@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v) } EXPORT_SYMBOL_GPL(raw_seq_stop); -static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) +static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->daddr, @@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) __u16 destp = 0, srcp = inet->num; - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" + seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", i, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); - return tmpbuf; } -#define TMPSZ 128 - static int raw_seq_show(struct seq_file *seq, void *v) { - char tmpbuf[TMPSZ+1]; - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-*s\n", TMPSZ-1, - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout " - "inode drops"); - else { - struct raw_iter_state *state = raw_seq_private(seq); - - seq_printf(seq, "%-*s\n", TMPSZ-1, - get_raw_sock(v, tmpbuf, state->bucket)); - } + seq_printf(seq, " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode drops\n"); + else + raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; } @@ -972,27 +962,25 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, - unsigned short family) +int raw_seq_open(struct inode *ino, struct file *file, + struct raw_hashinfo *h, const struct seq_operations *ops) { int err; struct raw_iter_state *i; - err = seq_open_net(ino, file, &raw_seq_ops, - sizeof(struct raw_iter_state)); + err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state)); if (err < 0) return err; i = raw_seq_private((struct seq_file *)file->private_data); i->h = h; - i->family = family; return 0; } EXPORT_SYMBOL_GPL(raw_seq_open); static int raw_v4_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET); + return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops); } static const struct file_operations raw_seq_fops = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 896c768e41a2..8842ecb9be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -117,8 +117,6 @@ #define RT_GC_TIMEOUT (300*HZ) -static int ip_rt_min_delay = 2 * HZ; -static int ip_rt_max_delay = 10 * HZ; static int ip_rt_max_size; static int ip_rt_gc_timeout = RT_GC_TIMEOUT; static int ip_rt_gc_interval = 60 * HZ; @@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ; static int ip_rt_min_pmtu = 512 + 20 + 20; static int ip_rt_min_advmss = 256; static int ip_rt_secret_interval = 10 * 60 * HZ; -static int ip_rt_flush_expected; -static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) -static struct timer_list rt_flush_timer; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); static struct timer_list rt_secret_timer; @@ -169,6 +164,7 @@ static struct dst_ops ipv4_dst_ops = { .update_pmtu = ip_rt_update_pmtu, .local_out = ip_local_out, .entry_size = sizeof(struct rtable), + .entries = ATOMIC_INIT(0), }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -259,19 +255,16 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; static unsigned int rt_hash_log; -static unsigned int rt_hash_rnd; +static atomic_t rt_genid; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static int rt_intern_hash(unsigned hash, struct rtable *rth, - struct rtable **res); - static unsigned int rt_hash_code(u32 daddr, u32 saddr) { - return (jhash_2words(daddr, saddr, rt_hash_rnd) - & rt_hash_mask); + return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) + & rt_hash_mask; } #define rt_hash(daddr, saddr, idx) \ @@ -281,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { int bucket; + int genid; }; -static struct rtable *rt_cache_get_first(struct seq_file *seq) +static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) { struct rtable *r = NULL; - struct rt_cache_iter_state *st = seq->private; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { rcu_read_lock_bh(); - r = rt_hash_table[st->bucket].chain; - if (r) - break; + r = rcu_dereference(rt_hash_table[st->bucket].chain); + while (r) { + if (r->rt_genid == st->genid) + return r; + r = rcu_dereference(r->u.dst.rt_next); + } rcu_read_unlock_bh(); } - return rcu_dereference(r); + return r; } -static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) +static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) { - struct rt_cache_iter_state *st = seq->private; - r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); @@ -313,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) return rcu_dereference(r); } -static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) +static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) { - struct rtable *r = rt_cache_get_first(seq); + struct rtable *r = rt_cache_get_first(st); if (r) - while (pos && (r = rt_cache_get_next(seq, r))) + while (pos && (r = rt_cache_get_next(st, r))) { + if (r->rt_genid != st->genid) + continue; --pos; + } return pos ? NULL : r; } static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { - return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + struct rt_cache_iter_state *st = seq->private; + + if (*pos) + return rt_cache_get_idx(st, *pos - 1); + st->genid = atomic_read(&rt_genid); + return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct rtable *r = NULL; + struct rtable *r; + struct rt_cache_iter_state *st = seq->private; if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(seq); + r = rt_cache_get_first(st); else - r = rt_cache_get_next(seq, v); + r = rt_cache_get_next(st, v); ++*pos; return r; } @@ -708,6 +711,11 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + if (rth->rt_genid != atomic_read(&rt_genid)) { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + continue; + } if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { @@ -732,83 +740,45 @@ static void rt_check_expire(void) /* * rt_worker_func() is run in process context. - * If a whole flush was scheduled, it is done. - * Else, we call rt_check_expire() to scan part of the hash table + * we call rt_check_expire() to scan part of the hash table */ static void rt_worker_func(struct work_struct *work) { - if (ip_rt_flush_expected) { - ip_rt_flush_expected = 0; - rt_do_flush(1); - } else - rt_check_expire(); + rt_check_expire(); schedule_delayed_work(&expires_work, ip_rt_gc_interval); } -/* This can run from both BH and non-BH contexts, the latter - * in the case of a forced flush event. +/* + * Pertubation of rt_genid by a small quantity [1..256] + * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() + * many times (2^24) without giving recent rt_genid. + * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_run_flush(unsigned long process_context) +static void rt_cache_invalidate(void) { - rt_deadline = 0; - - get_random_bytes(&rt_hash_rnd, 4); + unsigned char shuffle; - rt_do_flush(process_context); + get_random_bytes(&shuffle, sizeof(shuffle)); + atomic_add(shuffle + 1U, &rt_genid); } -static DEFINE_SPINLOCK(rt_flush_lock); - +/* + * delay < 0 : invalidate cache (fast : entries will be deleted later) + * delay >= 0 : invalidate & flush cache (can be long) + */ void rt_cache_flush(int delay) { - unsigned long now = jiffies; - int user_mode = !in_softirq(); - - if (delay < 0) - delay = ip_rt_min_delay; - - spin_lock_bh(&rt_flush_lock); - - if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { - long tmo = (long)(rt_deadline - now); - - /* If flush timer is already running - and flush request is not immediate (delay > 0): - - if deadline is not achieved, prolongate timer to "delay", - otherwise fire it at deadline time. - */ - - if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) - tmo = 0; - - if (delay > tmo) - delay = tmo; - } - - if (delay <= 0) { - spin_unlock_bh(&rt_flush_lock); - rt_run_flush(user_mode); - return; - } - - if (rt_deadline == 0) - rt_deadline = now + ip_rt_max_delay; - - mod_timer(&rt_flush_timer, now+delay); - spin_unlock_bh(&rt_flush_lock); + rt_cache_invalidate(); + if (delay >= 0) + rt_do_flush(!in_softirq()); } /* - * We change rt_hash_rnd and ask next rt_worker_func() invocation - * to perform a flush in process context + * We change rt_genid and let gc do the cleanup */ static void rt_secret_rebuild(unsigned long dummy) { - get_random_bytes(&rt_hash_rnd, 4); - ip_rt_flush_expected = 1; - cancel_delayed_work(&expires_work); - schedule_delayed_work(&expires_work, HZ/10); + rt_cache_invalidate(); mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); } @@ -885,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (!rt_may_expire(rth, tmo, expire)) { + if (rth->rt_genid == atomic_read(&rt_genid) && + !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; continue; @@ -966,6 +937,11 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { + if (rth->rt_genid != atomic_read(&rt_genid)) { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + continue; + } if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { /* Put it first */ *rthp = rth->u.dst.rt_next; @@ -1131,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) static void rt_del(unsigned hash, struct rtable *rt) { - struct rtable **rthp; + struct rtable **rthp, *aux; + rthp = &rt_hash_table[hash].chain; spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); - for (rthp = &rt_hash_table[hash].chain; *rthp; - rthp = &(*rthp)->u.dst.rt_next) - if (*rthp == rt) { - *rthp = rt->u.dst.rt_next; - rt_free(rt); - break; + while ((aux = *rthp) != NULL) { + if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + *rthp = aux->u.dst.rt_next; + rt_free(aux); + continue; } + rthp = &aux->u.dst.rt_next; + } spin_unlock_bh(rt_hash_lock_addr(hash)); } @@ -1186,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || - rth->fl.iif != 0) { + rth->fl.iif != 0 || + rth->rt_genid != atomic_read(&rt_genid)) { rthp = &rth->u.dst.rt_next; continue; } @@ -1224,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - + rt->rt_genid = atomic_read(&rt_genid); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1445,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src == iph->saddr && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { unsigned short mtu = new_mtu; if (new_mtu < 68 || new_mtu >= old_mtu) { @@ -1680,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_type = RTN_MULTICAST; + rth->rt_genid = atomic_read(&rt_genid); rth->rt_flags = RTCF_MULTICAST; + rth->rt_type = RTN_MULTICAST; if (our) { rth->u.dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1820,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; + rth->rt_genid = atomic_read(&rt_genid); rt_set_nexthop(rth, res, itag); @@ -1980,6 +1962,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; + rth->rt_genid = atomic_read(&rt_genid); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2071,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif == 0 && rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2199,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; + rth->rt_genid = atomic_read(&rt_genid); RT_CACHE_STAT_INC(out_slow_tot); @@ -2471,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2498,6 +2484,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entry_size = sizeof(struct rtable), + .entries = ATOMIC_INIT(0), }; @@ -2525,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); + rt->rt_genid = atomic_read(&rt_genid); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2779,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (idx < s_idx) continue; + if (rt->rt_genid != atomic_read(&rt_genid)) + continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, @@ -2848,24 +2838,6 @@ ctl_table ipv4_route_table[] = { .strategy = &ipv4_sysctl_rtcache_flush_strategy, }, { - .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, - .procname = "min_delay", - .data = &ip_rt_min_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, - .procname = "max_delay", - .data = &ip_rt_max_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, @@ -3023,8 +2995,8 @@ int __init ip_rt_init(void) { int rc = 0; - rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7))); + atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); @@ -3057,7 +3029,6 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - setup_timer(&rt_flush_timer, rt_run_flush, 0); setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); /* All the timers, started at system startup tend diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 82cdf23837e3..88286f35d1e2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -185,7 +185,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam tcp_get_available_congestion_control(tbl.data, tbl.maxlen); ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); - if (ret == 0 && newval && newlen) + if (ret == 1 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fa2c85ca5bc3..19c449f62672 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2153,7 +2153,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; - if (tcp_is_fack(tp) || + if (tcp_is_fack(tp) || tcp_is_reno(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9aea88b8d4fc..77c1939a2b0d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); + sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, + iph->saddr, th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1503,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, + th->source, iph->daddr, th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1661,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, + th->source, iph->daddr, th->dest, inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1735,7 +1735,8 @@ do_time_wait: } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, + struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, + &tcp_hashinfo, iph->daddr, th->dest, inet_iif(skb)); if (sk2) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 89f0188885c7..ed750f9ceb07 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2564,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect); EXPORT_SYMBOL(tcp_make_synack); EXPORT_SYMBOL(tcp_simple_retransmit); EXPORT_SYMBOL(tcp_sync_mss); -EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor); EXPORT_SYMBOL(tcp_mtup_init); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2fb8d731026b..7ea1b67b6de1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -130,14 +130,14 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -static inline int __udp_lib_lport_inuse(__u16 num, +static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_hash == num) + if (sk->sk_net == net && sk->sk_hash == num) return 1; return 0; } @@ -159,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_head *head; struct sock *sk2; int error = 1; + struct net *net = sk->sk_net; write_lock_bh(&udp_hash_lock); @@ -198,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, /* 2nd pass: find hole in shortest hash chain */ rover = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(rover, udptable)) + if (! __udp_lib_lport_inuse(net, rover, udptable)) goto gotit; rover += UDP_HTABLE_SIZE; if (rover > high) @@ -218,6 +219,7 @@ gotit: sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && + sk2->sk_net == net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -261,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -274,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + if (sk->sk_net == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); if (inet->rcv_saddr) { if (inet->rcv_saddr != daddr) @@ -361,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) int harderr; int err; - sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, - skb->dev->ifindex, udptable ); + sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -1185,8 +1188,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - inet_iif(skb), udptable); + sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); if (sk != NULL) { int ret = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3783e3ee56a4..10ed70491434 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -247,6 +247,7 @@ static struct dst_ops xfrm4_dst_ops = { .local_out = __ip_local_out, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), + .entries = ATOMIC_INIT(0), }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 326845195620..41f5982d2087 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x) { } -static struct xfrm_type ipip_type = { +static const struct xfrm_type ipip_type = { .description = "IPIP", .owner = THIS_MODULE, .proto = IPPROTO_IPIP, @@ -50,7 +50,7 @@ static struct xfrm_type ipip_type = { static int xfrm_tunnel_rcv(struct sk_buff *skb) { - return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); + return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); } static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) |