summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-02-01 21:06:29 +1100
committerLinus Torvalds <torvalds@linux-foundation.org>2008-02-01 21:06:29 +1100
commitcec03afcb62fbbb0eaf943f6349ade61b89d7d40 (patch)
treecc80c13e373337d1c1dee9dd7269173da1f7c079 /net/ipv4
parent2da53b0134ad41b91556d2d2a322cc03487a1ab7 (diff)
parent4814bdbd590e835ecec2d5e505165ec1c19796b2 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (173 commits) [NETNS]: Lookup in FIB semantic hashes taking into account the namespace. [NETNS]: Add a namespace mark to fib_info. [IPV4]: fib_sync_down rework. [NETNS]: Process interface address manipulation routines in the namespace. [IPV4]: Small style cleanup of the error path in rtm_to_ifaddr. [IPV4]: Fix memory leak on error path during FIB initialization. [NETFILTER]: Ipv6-related xt_hashlimit compilation fix. [NET_SCHED]: Add flow classifier [NET_SCHED]: sch_sfq: make internal queues visible as classes [NET_SCHED]: sch_sfq: add support for external classifiers [NET_SCHED]: Constify struct tcf_ext_map [BLUETOOTH]: Fix bugs in previous conn add/del workqueue changes. [TCP]: Unexport sysctl_tcp_tso_win_divisor [IPV4]: Make struct ipv4_devconf static. [TR] net/802/tr.c: sysctl_tr_rif_timeout static [XFRM]: Fix statistics. [XFRM]: Remove unused exports. [PKT_SCHED] sch_teql.c: Duplicate IFF_BROADCAST in FMASK, remove 2nd. [BNX2]: Fix ASYM PAUSE advertisement for remote PHY. [IPV4] route cache: Introduce rt_genid for smooth cache invalidation ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/devinet.c37
-rw-r--r--net/ipv4/esp4.c554
-rw-r--r--net/ipv4/fib_frontend.c14
-rw-r--r--net/ipv4/fib_hash.c47
-rw-r--r--net/ipv4/fib_semantics.c116
-rw-r--r--net/ipv4/fib_trie.c104
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_diag.c15
-rw-r--r--net/ipv4/inet_hashtables.c69
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ipcomp.c7
-rw-r--r--net/ipv4/netfilter/arp_tables.c102
-rw-r--r--net/ipv4/netfilter/arptable_filter.c31
-rw-r--r--net/ipv4/netfilter/ip_queue.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c112
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/iptable_filter.c33
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c33
-rw-r--r--net/ipv4/netfilter/iptable_raw.c33
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c40
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c42
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c2
-rw-r--r--net/ipv4/raw.c42
-rw-r--r--net/ipv4/route.c211
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c15
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c25
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
47 files changed, 1102 insertions, 748 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 24e2b7294bf8..19880b086e71 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -343,6 +343,7 @@ config INET_ESP
tristate "IP: ESP transformation"
select XFRM
select CRYPTO
+ select CRYPTO_AEAD
select CRYPTO_HMAC
select CRYPTO_MD5
select CRYPTO_CBC
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index d76803a3dcae..9d4555ec0b59 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -300,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x)
}
-static struct xfrm_type ah_type =
+static const struct xfrm_type ah_type =
{
.description = "AH4",
.owner = THIS_MODULE,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 5976c598cc4b..8e17f65f4002 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -558,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
*/
struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct net_device *dev, __be32 src_ip,
- unsigned char *dest_hw, unsigned char *src_hw,
- unsigned char *target_hw)
+ const unsigned char *dest_hw,
+ const unsigned char *src_hw,
+ const unsigned char *target_hw)
{
struct sk_buff *skb;
struct arphdr *arp;
@@ -672,8 +673,8 @@ void arp_xmit(struct sk_buff *skb)
*/
void arp_send(int type, int ptype, __be32 dest_ip,
struct net_device *dev, __be32 src_ip,
- unsigned char *dest_hw, unsigned char *src_hw,
- unsigned char *target_hw)
+ const unsigned char *dest_hw, const unsigned char *src_hw,
+ const unsigned char *target_hw)
{
struct sk_buff *skb;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 21f71bf912d5..f282b26f63eb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,7 +64,7 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
-struct ipv4_devconf ipv4_devconf = {
+static struct ipv4_devconf ipv4_devconf = {
.data = {
[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
@@ -485,46 +485,41 @@ errout:
return err;
}
-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
{
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
- int err = -EINVAL;
+ int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err < 0)
goto errout;
ifm = nlmsg_data(nlh);
- if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
- err = -EINVAL;
+ err = -EINVAL;
+ if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
goto errout;
- }
- dev = __dev_get_by_index(&init_net, ifm->ifa_index);
- if (dev == NULL) {
- err = -ENODEV;
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ err = -ENODEV;
+ if (dev == NULL)
goto errout;
- }
in_dev = __in_dev_get_rtnl(dev);
- if (in_dev == NULL) {
- err = -ENOBUFS;
+ err = -ENOBUFS;
+ if (in_dev == NULL)
goto errout;
- }
ifa = inet_alloc_ifa();
- if (ifa == NULL) {
+ if (ifa == NULL)
/*
* A potential indev allocation can be left alive, it stays
* assigned to its device and is destroy with it.
*/
- err = -ENOBUFS;
goto errout;
- }
ipv4_devconf_setall(in_dev);
in_dev_hold(in_dev);
@@ -568,7 +563,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
if (net != &init_net)
return -EINVAL;
- ifa = rtm_to_ifaddr(nlh);
+ ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
@@ -1182,7 +1177,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
s_ip_idx = ip_idx = cb->args[1];
idx = 0;
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -1216,7 +1211,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
struct sk_buff *skb;
u32 seq = nlh ? nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
+ struct net *net;
+ net = ifa->ifa_dev->dev->nd_net;
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -1228,10 +1225,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 28ea5c77ca23..258d17631b4b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,27 +1,118 @@
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
#include <linux/err.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/esp.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
-#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/in6.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/udp.h>
+struct esp_skb_cb {
+ struct xfrm_skb_cb xfrm;
+ void *tmp;
+};
+
+#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+
+/*
+ * Allocate an AEAD request structure with extra space for SG and IV.
+ *
+ * For alignment considerations the IV is placed at the front, followed
+ * by the request and finally the SG list.
+ *
+ * TODO: Use spare space in skb for this where possible.
+ */
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
+{
+ unsigned int len;
+
+ len = crypto_aead_ivsize(aead);
+ if (len) {
+ len += crypto_aead_alignmask(aead) &
+ ~(crypto_tfm_ctx_alignment() - 1);
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+ }
+
+ len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+ len = ALIGN(len, __alignof__(struct scatterlist));
+
+ len += sizeof(struct scatterlist) * nfrags;
+
+ return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
+{
+ return crypto_aead_ivsize(aead) ?
+ PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
+}
+
+static inline struct aead_givcrypt_request *esp_tmp_givreq(
+ struct crypto_aead *aead, u8 *iv)
+{
+ struct aead_givcrypt_request *req;
+
+ req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+ crypto_tfm_ctx_alignment());
+ aead_givcrypt_set_tfm(req, aead);
+ return req;
+}
+
+static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
+{
+ struct aead_request *req;
+
+ req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+ crypto_tfm_ctx_alignment());
+ aead_request_set_tfm(req, aead);
+ return req;
+}
+
+static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
+ struct aead_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_aead_reqsize(aead),
+ __alignof__(struct scatterlist));
+}
+
+static inline struct scatterlist *esp_givreq_sg(
+ struct crypto_aead *aead, struct aead_givcrypt_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_aead_reqsize(aead),
+ __alignof__(struct scatterlist));
+}
+
+static void esp_output_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ kfree(ESP_SKB_CB(skb)->tmp);
+ xfrm_output_resume(skb, err);
+}
+
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
- struct crypto_blkcipher *tfm;
- struct blkcipher_desc desc;
+ struct crypto_aead *aead;
+ struct aead_givcrypt_request *req;
+ struct scatterlist *sg;
+ struct scatterlist *asg;
struct esp_data *esp;
struct sk_buff *trailer;
+ void *tmp;
+ u8 *iv;
u8 *tail;
int blksize;
int clen;
@@ -36,18 +127,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
clen = skb->len;
esp = x->data;
- alen = esp->auth.icv_trunc_len;
- tfm = esp->conf.tfm;
- desc.tfm = tfm;
- desc.flags = 0;
- blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
+ aead = esp->aead;
+ alen = crypto_aead_authsize(aead);
+
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(clen + 2, blksize);
- if (esp->conf.padlen)
- clen = ALIGN(clen, esp->conf.padlen);
+ if (esp->padlen)
+ clen = ALIGN(clen, esp->padlen);
+
+ if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
+ goto error;
+ nfrags = err;
- if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
+ tmp = esp_alloc_tmp(aead, nfrags + 1);
+ if (!tmp)
goto error;
+ iv = esp_tmp_iv(aead, tmp);
+ req = esp_tmp_givreq(aead, iv);
+ asg = esp_givreq_sg(aead, req);
+ sg = asg + 1;
+
/* Fill padding... */
tail = skb_tail_pointer(trailer);
do {
@@ -56,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
tail[i] = i + 1;
} while (0);
tail[clen - skb->len - 2] = (clen - skb->len) - 2;
- pskb_put(skb, trailer, clen - skb->len);
+ tail[clen - skb->len - 1] = *skb_mac_header(skb);
+ pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
esph = ip_esp_hdr(skb);
- *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
- spin_lock_bh(&x->lock);
-
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh;
__be32 *udpdata32;
+ unsigned int sport, dport;
+ int encap_type;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ encap_type = encap->encap_type;
+ spin_unlock_bh(&x->lock);
uh = (struct udphdr *)esph;
- uh->source = encap->encap_sport;
- uh->dest = encap->encap_dport;
- uh->len = htons(skb->len + alen - skb_transport_offset(skb));
+ uh->source = sport;
+ uh->dest = dport;
+ uh->len = htons(skb->len - skb_transport_offset(skb));
uh->check = 0;
- switch (encap->encap_type) {
+ switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
esph = (struct ip_esp_hdr *)(uh + 1);
@@ -95,131 +201,45 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
- if (esp->conf.ivlen) {
- if (unlikely(!esp->conf.ivinitted)) {
- get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
- esp->conf.ivinitted = 1;
- }
- crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
- }
-
- do {
- struct scatterlist *sg = &esp->sgbuf[0];
-
- if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
- sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
- if (!sg)
- goto unlock;
- }
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- esph->enc_data +
- esp->conf.ivlen -
- skb->data, clen);
- err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
- if (unlikely(sg != &esp->sgbuf[0]))
- kfree(sg);
- } while (0);
-
- if (unlikely(err))
- goto unlock;
-
- if (esp->conf.ivlen) {
- memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
- crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
- }
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
+ clen + alen);
+ sg_init_one(asg, esph, sizeof(*esph));
+
+ aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
+ aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
+ aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
+ aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
+
+ ESP_SKB_CB(skb)->tmp = tmp;
+ err = crypto_aead_givencrypt(req);
+ if (err == -EINPROGRESS)
+ goto error;
- if (esp->auth.icv_full_len) {
- err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
- sizeof(*esph) + esp->conf.ivlen + clen);
- memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
- }
+ if (err == -EBUSY)
+ err = NET_XMIT_DROP;
-unlock:
- spin_unlock_bh(&x->lock);
+ kfree(tmp);
error:
return err;
}
-/*
- * Note: detecting truncated vs. non-truncated authentication data is very
- * expensive, so we only support truncated data, which is the recommended
- * and common case.
- */
-static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
+static int esp_input_done2(struct sk_buff *skb, int err)
{
struct iphdr *iph;
- struct ip_esp_hdr *esph;
+ struct xfrm_state *x = xfrm_input_state(skb);
struct esp_data *esp = x->data;
- struct crypto_blkcipher *tfm = esp->conf.tfm;
- struct blkcipher_desc desc = { .tfm = tfm };
- struct sk_buff *trailer;
- int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
- int alen = esp->auth.icv_trunc_len;
- int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen;
- int nfrags;
+ struct crypto_aead *aead = esp->aead;
+ int alen = crypto_aead_authsize(aead);
+ int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ int elen = skb->len - hlen;
int ihl;
u8 nexthdr[2];
- struct scatterlist *sg;
int padlen;
- int err = -EINVAL;
-
- if (!pskb_may_pull(skb, sizeof(*esph)))
- goto out;
-
- if (elen <= 0 || (elen & (blksize-1)))
- goto out;
-
- if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
- goto out;
- nfrags = err;
-
- skb->ip_summed = CHECKSUM_NONE;
-
- spin_lock(&x->lock);
-
- /* If integrity check is required, do this. */
- if (esp->auth.icv_full_len) {
- u8 sum[alen];
- err = esp_mac_digest(esp, skb, 0, skb->len - alen);
- if (err)
- goto unlock;
-
- if (skb_copy_bits(skb, skb->len - alen, sum, alen))
- BUG();
-
- if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
- err = -EBADMSG;
- goto unlock;
- }
- }
-
- esph = (struct ip_esp_hdr *)skb->data;
-
- /* Get ivec. This can be wrong, check against another impls. */
- if (esp->conf.ivlen)
- crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
-
- sg = &esp->sgbuf[0];
-
- if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
- err = -ENOMEM;
- sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
- if (!sg)
- goto unlock;
- }
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- sizeof(*esph) + esp->conf.ivlen,
- elen);
- err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
- if (unlikely(sg != &esp->sgbuf[0]))
- kfree(sg);
-
-unlock:
- spin_unlock(&x->lock);
+ kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
@@ -229,15 +249,11 @@ unlock:
err = -EINVAL;
padlen = nexthdr[0];
- if (padlen+2 >= elen)
+ if (padlen + 2 + alen >= elen)
goto out;
/* ... check padding bits here. Silly. :-) */
- /* RFC4303: Drop dummy packets without any error */
- if (nexthdr[1] == IPPROTO_NONE)
- goto out;
-
iph = ip_hdr(skb);
ihl = iph->ihl * 4;
@@ -279,10 +295,87 @@ unlock:
}
pskb_trim(skb, skb->len - alen - padlen - 2);
- __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+ __skb_pull(skb, hlen);
skb_set_transport_header(skb, -ihl);
- return nexthdr[1];
+ err = nexthdr[1];
+
+ /* RFC4303: Drop dummy packets without any error */
+ if (err == IPPROTO_NONE)
+ err = -EINVAL;
+
+out:
+ return err;
+}
+
+static void esp_input_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ xfrm_input_resume(skb, esp_input_done2(skb, err));
+}
+
+/*
+ * Note: detecting truncated vs. non-truncated authentication data is very
+ * expensive, so we only support truncated data, which is the recommended
+ * and common case.
+ */
+static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead = esp->aead;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
+ int nfrags;
+ void *tmp;
+ u8 *iv;
+ struct scatterlist *sg;
+ struct scatterlist *asg;
+ int err = -EINVAL;
+
+ if (!pskb_may_pull(skb, sizeof(*esph)))
+ goto out;
+
+ if (elen <= 0)
+ goto out;
+
+ if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ goto out;
+ nfrags = err;
+
+ err = -ENOMEM;
+ tmp = esp_alloc_tmp(aead, nfrags + 1);
+ if (!tmp)
+ goto out;
+
+ ESP_SKB_CB(skb)->tmp = tmp;
+ iv = esp_tmp_iv(aead, tmp);
+ req = esp_tmp_req(aead, iv);
+ asg = esp_req_sg(aead, req);
+ sg = asg + 1;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ esph = (struct ip_esp_hdr *)skb->data;
+
+ /* Get ivec. This can be wrong, check against another impls. */
+ iv = esph->enc_data;
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+ sg_init_one(asg, esph, sizeof(*esph));
+
+ aead_request_set_callback(req, 0, esp_input_done, skb);
+ aead_request_set_crypt(req, sg, sg, elen, iv);
+ aead_request_set_assoc(req, asg, sizeof(*esph));
+
+ err = crypto_aead_decrypt(req);
+ if (err == -EINPROGRESS)
+ goto out;
+
+ err = esp_input_done2(skb, err);
out:
return err;
@@ -291,11 +384,11 @@ out:
static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- u32 align = max_t(u32, blksize, esp->conf.padlen);
+ u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
+ u32 align = max_t(u32, blksize, esp->padlen);
u32 rem;
- mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
rem = mtu & (align - 1);
mtu &= ~(align - 1);
@@ -342,80 +435,143 @@ static void esp_destroy(struct xfrm_state *x)
if (!esp)
return;
- crypto_free_blkcipher(esp->conf.tfm);
- esp->conf.tfm = NULL;
- kfree(esp->conf.ivec);
- esp->conf.ivec = NULL;
- crypto_free_hash(esp->auth.tfm);
- esp->auth.tfm = NULL;
- kfree(esp->auth.work_icv);
- esp->auth.work_icv = NULL;
+ crypto_free_aead(esp->aead);
kfree(esp);
}
-static int esp_init_state(struct xfrm_state *x)
+static int esp_init_aead(struct xfrm_state *x)
{
- struct esp_data *esp = NULL;
- struct crypto_blkcipher *tfm;
- u32 align;
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead;
+ int err;
+
+ aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+ err = PTR_ERR(aead);
+ if (IS_ERR(aead))
+ goto error;
+
+ esp->aead = aead;
+
+ err = crypto_aead_setkey(aead, x->aead->alg_key,
+ (x->aead->alg_key_len + 7) / 8);
+ if (err)
+ goto error;
+
+ err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
+ if (err)
+ goto error;
+
+error:
+ return err;
+}
+static int esp_init_authenc(struct xfrm_state *x)
+{
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead;
+ struct crypto_authenc_key_param *param;
+ struct rtattr *rta;
+ char *key;
+ char *p;
+ char authenc_name[CRYPTO_MAX_ALG_NAME];
+ unsigned int keylen;
+ int err;
+
+ err = -EINVAL;
if (x->ealg == NULL)
goto error;
- esp = kzalloc(sizeof(*esp), GFP_KERNEL);
- if (esp == NULL)
- return -ENOMEM;
+ err = -ENAMETOOLONG;
+ if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)",
+ x->aalg ? x->aalg->alg_name : "digest_null",
+ x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+
+ aead = crypto_alloc_aead(authenc_name, 0, 0);
+ err = PTR_ERR(aead);
+ if (IS_ERR(aead))
+ goto error;
+
+ esp->aead = aead;
+
+ keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
+ (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
+ err = -ENOMEM;
+ key = kmalloc(keylen, GFP_KERNEL);
+ if (!key)
+ goto error;
+
+ p = key;
+ rta = (void *)p;
+ rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
+ rta->rta_len = RTA_LENGTH(sizeof(*param));
+ param = RTA_DATA(rta);
+ p += RTA_SPACE(sizeof(*param));
if (x->aalg) {
struct xfrm_algo_desc *aalg_desc;
- struct crypto_hash *hash;
- hash = crypto_alloc_hash(x->aalg->alg_name, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash))
- goto error;
-
- esp->auth.tfm = hash;
- if (crypto_hash_setkey(hash, x->aalg->alg_key,
- (x->aalg->alg_key_len + 7) / 8))
- goto error;
+ memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
+ p += (x->aalg->alg_key_len + 7) / 8;
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
+ err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
- crypto_hash_digestsize(hash)) {
+ crypto_aead_authsize(aead)) {
NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
x->aalg->alg_name,
- crypto_hash_digestsize(hash),
+ crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits/8);
- goto error;
+ goto free_key;
}
- esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
- esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
-
- esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
- if (!esp->auth.work_icv)
- goto error;
+ err = crypto_aead_setauthsize(
+ aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
+ if (err)
+ goto free_key;
}
- tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- goto error;
- esp->conf.tfm = tfm;
- esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
- esp->conf.padlen = 0;
- if (esp->conf.ivlen) {
- esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
- if (unlikely(esp->conf.ivec == NULL))
- goto error;
- esp->conf.ivinitted = 0;
- }
- if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key,
- (x->ealg->alg_key_len + 7) / 8))
+ param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
+ memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
+
+ err = crypto_aead_setkey(aead, key, keylen);
+
+free_key:
+ kfree(key);
+
+error:
+ return err;
+}
+
+static int esp_init_state(struct xfrm_state *x)
+{
+ struct esp_data *esp;
+ struct crypto_aead *aead;
+ u32 align;
+ int err;
+
+ esp = kzalloc(sizeof(*esp), GFP_KERNEL);
+ if (esp == NULL)
+ return -ENOMEM;
+
+ x->data = esp;
+
+ if (x->aead)
+ err = esp_init_aead(x);
+ else
+ err = esp_init_authenc(x);
+
+ if (err)
goto error;
- x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
+
+ aead = esp->aead;
+
+ esp->padlen = 0;
+
+ x->props.header_len = sizeof(struct ip_esp_hdr) +
+ crypto_aead_ivsize(aead);
if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
else if (x->props.mode == XFRM_MODE_BEET)
@@ -434,21 +590,17 @@ static int esp_init_state(struct xfrm_state *x)
break;
}
}
- x->data = esp;
- align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- if (esp->conf.padlen)
- align = max_t(u32, align, esp->conf.padlen);
- x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
- return 0;
+
+ align = ALIGN(crypto_aead_blocksize(aead), 4);
+ if (esp->padlen)
+ align = max_t(u32, align, esp->padlen);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
error:
- x->data = esp;
- esp_destroy(x);
- x->data = NULL;
- return -EINVAL;
+ return err;
}
-static struct xfrm_type esp_type =
+static const struct xfrm_type esp_type =
{
.description = "ESP4",
.owner = THIS_MODULE,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d28261826bc2..86ff2711fc95 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
First of all, we scan fib_info list searching
for stray nexthop entries, then ignite fib_flush.
*/
- if (fib_sync_down(ifa->ifa_local, NULL, 0))
+ if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local))
fib_flush(dev->nd_net);
}
}
@@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net)
static void fib_disable_ip(struct net_device *dev, int force)
{
- if (fib_sync_down(0, dev, force))
+ if (fib_sync_down_dev(dev, force))
fib_flush(dev->nd_net);
rt_cache_flush(0);
arp_ifdown(dev);
@@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = {
static int __net_init ip_fib_net_init(struct net *net)
{
+ int err;
unsigned int i;
net->ipv4.fib_table_hash = kzalloc(
@@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net)
for (i = 0; i < FIB_TABLE_HASHSZ; i++)
INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
- return fib4_rules_init(net);
+ err = fib4_rules_init(net);
+ if (err < 0)
+ goto fail;
+ return 0;
+
+fail:
+ kfree(net->ipv4.fib_table_hash);
+ return err;
}
static void __net_exit ip_fib_net_exit(struct net *net)
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a15b2f1b2721..76b9c684cccd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -424,19 +424,43 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
if (fa && fa->fa_tos == tos &&
fa->fa_info->fib_priority == fi->fib_priority) {
- struct fib_alias *fa_orig;
+ struct fib_alias *fa_first, *fa_match;
err = -EEXIST;
if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
+ /* We have 2 goals:
+ * 1. Find exact match for type, scope, fib_info to avoid
+ * duplicate routes
+ * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
+ */
+ fa_match = NULL;
+ fa_first = fa;
+ fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+ list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
+ if (fa->fa_tos != tos)
+ break;
+ if (fa->fa_info->fib_priority != fi->fib_priority)
+ break;
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
+ fa->fa_info == fi) {
+ fa_match = fa;
+ break;
+ }
+ }
+
if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
- if (fi->fib_treeref > 1)
+ fa = fa_first;
+ if (fa_match) {
+ if (fa == fa_match)
+ err = 0;
goto out;
-
+ }
write_lock_bh(&fib_hash_lock);
fi_drop = fa->fa_info;
fa->fa_info = fi;
@@ -459,20 +483,11 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
* uses the same scope, type, and nexthop
* information.
*/
- fa_orig = fa;
- fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
- list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
- if (fa->fa_tos != tos)
- break;
- if (fa->fa_info->fib_priority != fi->fib_priority)
- break;
- if (fa->fa_type == cfg->fc_type &&
- fa->fa_scope == cfg->fc_scope &&
- fa->fa_info == fi)
- goto out;
- }
+ if (fa_match)
+ goto out;
+
if (!(cfg->fc_nlflags & NLM_F_APPEND))
- fa = fa_orig;
+ fa = fa_first;
}
err = -ENOENT;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c7912866d987..a13c84763d4c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
head = &fib_info_hash[hash];
hlist_for_each_entry(fi, node, head, fib_hash) {
+ if (fi->fib_net != nfi->fib_net)
+ continue;
if (fi->fib_nhs != nfi->fib_nhs)
continue;
if (nfi->fib_protocol == fi->fib_protocol &&
@@ -687,6 +689,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
struct fib_info *fi = NULL;
struct fib_info *ofi;
int nhs = 1;
+ struct net *net = cfg->fc_nlinfo.nl_net;
/* Fast check to catch the most weird cases */
if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
@@ -727,6 +730,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
fib_info_cnt++;
+ fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_flags = cfg->fc_flags;
fi->fib_priority = cfg->fc_priority;
@@ -798,8 +802,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (nhs != 1 || nh->nh_gw)
goto err_inval;
nh->nh_scope = RT_SCOPE_NOWHERE;
- nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
- fi->fib_nh->nh_oif);
+ nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
err = -ENODEV;
if (nh->nh_dev == NULL)
goto failure;
@@ -813,8 +816,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (fi->fib_prefsrc) {
if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
fi->fib_prefsrc != cfg->fc_dst)
- if (inet_addr_type(cfg->fc_nlinfo.nl_net,
- fi->fib_prefsrc) != RTN_LOCAL)
+ if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
@@ -1031,70 +1033,74 @@ nla_put_failure:
referring to it.
- device went down -> we must shutdown all nexthops going via it.
*/
-
-int fib_sync_down(__be32 local, struct net_device *dev, int force)
+int fib_sync_down_addr(struct net *net, __be32 local)
{
int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
-
- if (force)
- scope = -1;
+ unsigned int hash = fib_laddr_hashfn(local);
+ struct hlist_head *head = &fib_info_laddrhash[hash];
+ struct hlist_node *node;
+ struct fib_info *fi;
- if (local && fib_info_laddrhash) {
- unsigned int hash = fib_laddr_hashfn(local);
- struct hlist_head *head = &fib_info_laddrhash[hash];
- struct hlist_node *node;
- struct fib_info *fi;
+ if (fib_info_laddrhash == NULL || local == 0)
+ return 0;
- hlist_for_each_entry(fi, node, head, fib_lhash) {
- if (fi->fib_prefsrc == local) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- }
+ hlist_for_each_entry(fi, node, head, fib_lhash) {
+ if (fi->fib_net != net)
+ continue;
+ if (fi->fib_prefsrc == local) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ ret++;
}
}
+ return ret;
+}
- if (dev) {
- struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
- struct hlist_node *node;
- struct fib_nh *nh;
+int fib_sync_down_dev(struct net_device *dev, int force)
+{
+ int ret = 0;
+ int scope = RT_SCOPE_NOWHERE;
+ struct fib_info *prev_fi = NULL;
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_node *node;
+ struct fib_nh *nh;
- hlist_for_each_entry(nh, node, head, nh_hash) {
- struct fib_info *fi = nh->nh_parent;
- int dead;
+ if (force)
+ scope = -1;
- BUG_ON(!fi->fib_nhs);
- if (nh->nh_dev != dev || fi == prev_fi)
- continue;
- prev_fi = fi;
- dead = 0;
- change_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
- dead++;
- else if (nh->nh_dev == dev &&
- nh->nh_scope != scope) {
- nh->nh_flags |= RTNH_F_DEAD;
+ hlist_for_each_entry(nh, node, head, nh_hash) {
+ struct fib_info *fi = nh->nh_parent;
+ int dead;
+
+ BUG_ON(!fi->fib_nhs);
+ if (nh->nh_dev != dev || fi == prev_fi)
+ continue;
+ prev_fi = fi;
+ dead = 0;
+ change_nexthops(fi) {
+ if (nh->nh_flags&RTNH_F_DEAD)
+ dead++;
+ else if (nh->nh_dev == dev &&
+ nh->nh_scope != scope) {
+ nh->nh_flags |= RTNH_F_DEAD;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- spin_lock_bh(&fib_multipath_lock);
- fi->fib_power -= nh->nh_power;
- nh->nh_power = 0;
- spin_unlock_bh(&fib_multipath_lock);
+ spin_lock_bh(&fib_multipath_lock);
+ fi->fib_power -= nh->nh_power;
+ nh->nh_power = 0;
+ spin_unlock_bh(&fib_multipath_lock);
#endif
- dead++;
- }
+ dead++;
+ }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (force > 1 && nh->nh_dev == dev) {
- dead = fi->fib_nhs;
- break;
- }
-#endif
- } endfor_nexthops(fi)
- if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
+ if (force > 1 && nh->nh_dev == dev) {
+ dead = fi->fib_nhs;
+ break;
}
+#endif
+ } endfor_nexthops(fi)
+ if (dead == fi->fib_nhs) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ ret++;
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f2f47033f31f..35851c96bdfb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1205,20 +1205,45 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
* and we need to allocate a new one of those as well.
*/
- if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
- struct fib_alias *fa_orig;
+ if (fa && fa->fa_tos == tos &&
+ fa->fa_info->fib_priority == fi->fib_priority) {
+ struct fib_alias *fa_first, *fa_match;
err = -EEXIST;
if (cfg->fc_nlflags & NLM_F_EXCL)
goto out;
+ /* We have 2 goals:
+ * 1. Find exact match for type, scope, fib_info to avoid
+ * duplicate routes
+ * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
+ */
+ fa_match = NULL;
+ fa_first = fa;
+ fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+ list_for_each_entry_continue(fa, fa_head, fa_list) {
+ if (fa->fa_tos != tos)
+ break;
+ if (fa->fa_info->fib_priority != fi->fib_priority)
+ break;
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_scope == cfg->fc_scope &&
+ fa->fa_info == fi) {
+ fa_match = fa;
+ break;
+ }
+ }
+
if (cfg->fc_nlflags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
- if (fi->fib_treeref > 1)
+ fa = fa_first;
+ if (fa_match) {
+ if (fa == fa_match)
+ err = 0;
goto out;
-
+ }
err = -ENOBUFS;
new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
@@ -1230,7 +1255,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_type = cfg->fc_type;
new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
- new_fa->fa_state &= ~FA_S_ACCESSED;
+ new_fa->fa_state = state & ~FA_S_ACCESSED;
list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
alias_free_mem_rcu(fa);
@@ -1247,20 +1272,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
* uses the same scope, type, and nexthop
* information.
*/
- fa_orig = fa;
- list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
- if (fa->fa_tos != tos)
- break;
- if (fa->fa_info->fib_priority != fi->fib_priority)
- break;
- if (fa->fa_type == cfg->fc_type &&
- fa->fa_scope == cfg->fc_scope &&
- fa->fa_info == fi)
- goto out;
- }
+ if (fa_match)
+ goto out;
if (!(cfg->fc_nlflags & NLM_F_APPEND))
- fa = fa_orig;
+ fa = fa_first;
}
err = -ENOENT;
if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1600,9 +1616,8 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
fa_to_delete = NULL;
- fa_head = fa->fa_list.prev;
-
- list_for_each_entry(fa, fa_head, fa_list) {
+ fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+ list_for_each_entry_continue(fa, fa_head, fa_list) {
struct fib_info *fi = fa->fa_info;
if (fa->fa_tos != tos)
@@ -1743,6 +1758,19 @@ static struct leaf *trie_nextleaf(struct leaf *l)
return leaf_walk_rcu(p, c);
}
+static struct leaf *trie_leafindex(struct trie *t, int index)
+{
+ struct leaf *l = trie_firstleaf(t);
+
+ while (index-- > 0) {
+ l = trie_nextleaf(l);
+ if (!l)
+ break;
+ }
+ return l;
+}
+
+
/*
* Caller must hold RTNL.
*/
@@ -1848,7 +1876,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
struct fib_alias *fa;
__be32 xkey = htonl(key);
- s_i = cb->args[4];
+ s_i = cb->args[5];
i = 0;
/* rcu_read_lock is hold by caller */
@@ -1869,12 +1897,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
plen,
fa->fa_tos,
fa->fa_info, NLM_F_MULTI) < 0) {
- cb->args[4] = i;
+ cb->args[5] = i;
return -1;
}
i++;
}
- cb->args[4] = i;
+ cb->args[5] = i;
return skb->len;
}
@@ -1885,7 +1913,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
struct hlist_node *node;
int i, s_i;
- s_i = cb->args[3];
+ s_i = cb->args[4];
i = 0;
/* rcu_read_lock is hold by caller */
@@ -1896,19 +1924,19 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
}
if (i > s_i)
- cb->args[4] = 0;
+ cb->args[5] = 0;
if (list_empty(&li->falh))
continue;
if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
- cb->args[3] = i;
+ cb->args[4] = i;
return -1;
}
i++;
}
- cb->args[3] = i;
+ cb->args[4] = i;
return skb->len;
}
@@ -1918,35 +1946,37 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
struct leaf *l;
struct trie *t = (struct trie *) tb->tb_data;
t_key key = cb->args[2];
+ int count = cb->args[3];
rcu_read_lock();
/* Dump starting at last key.
* Note: 0.0.0.0/0 (ie default) is first key.
*/
- if (!key)
+ if (count == 0)
l = trie_firstleaf(t);
else {
+ /* Normally, continue from last key, but if that is missing
+ * fallback to using slow rescan
+ */
l = fib_find_node(t, key);
- if (!l) {
- /* The table changed during the dump, rather than
- * giving partial data, just make application retry.
- */
- rcu_read_unlock();
- return -EBUSY;
- }
+ if (!l)
+ l = trie_leafindex(t, count);
}
while (l) {
cb->args[2] = l->key;
if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+ cb->args[3] = count;
rcu_read_unlock();
return -1;
}
+ ++count;
l = trie_nextleaf(l);
- memset(&cb->args[3], 0,
- sizeof(cb->args) - 3*sizeof(cb->args[0]));
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
}
+ cb->args[3] = count;
rcu_read_unlock();
return skb->len;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7801cceb2d1b..de5a41de191a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
struct hlist_node *node;
struct inet_bind_bucket *tb;
int ret;
+ struct net *net = sk->sk_net;
local_bh_disable();
if (!snum) {
@@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
- if (tb->port == rover)
+ if (tb->ib_net == net && tb->port == rover)
goto next;
break;
next:
@@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
- if (tb->port == snum)
+ if (tb->ib_net == net && tb->port == snum)
goto tb_found;
}
tb = NULL;
@@ -147,7 +148,8 @@ tb_found:
}
tb_not_found:
ret = 1;
- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
+ net, head, snum)) == NULL)
goto fail_unlock;
if (hlist_empty(&tb->owners)) {
if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 605ed2cd7972..da97695e7096 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -259,20 +259,22 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
const struct inet_diag_handler *handler;
handler = inet_diag_lock_handler(nlh->nlmsg_type);
- if (!handler)
- return -ENOENT;
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto unlock;
+ }
hashinfo = handler->idiag_hashinfo;
err = -EINVAL;
if (req->idiag_family == AF_INET) {
- sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
+ sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
}
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
else if (req->idiag_family == AF_INET6) {
- sk = inet6_lookup(hashinfo,
+ sk = inet6_lookup(&init_net, hashinfo,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
@@ -708,8 +710,8 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct inet_hashinfo *hashinfo;
handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
- if (!handler)
- goto no_handler;
+ if (IS_ERR(handler))
+ goto unlock;
hashinfo = handler->idiag_hashinfo;
@@ -838,7 +840,6 @@ done:
cb->args[2] = num;
unlock:
inet_diag_unlock_handler(handler);
-no_handler:
return skb->len;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 619c63c6948a..48d45008f749 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,12 +28,14 @@
* The bindhash mutex for snum's hash chain must be held here.
*/
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
+ struct net *net,
struct inet_bind_hashbucket *head,
const unsigned short snum)
{
struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
if (tb != NULL) {
+ tb->ib_net = net;
tb->port = snum;
tb->fastreuse = 0;
INIT_HLIST_HEAD(&tb->owners);
@@ -125,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
* remote address for the connection. So always assume those are both
* wildcarded during the search since they can never be otherwise.
*/
-static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+static struct sock *inet_lookup_listener_slow(struct net *net,
+ const struct hlist_head *head,
const __be32 daddr,
const unsigned short hnum,
const int dif)
@@ -137,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
sk_for_each(sk, node, head) {
const struct inet_sock *inet = inet_sk(sk);
- if (inet->num == hnum && !ipv6_only_sock(sk)) {
+ if (sk->sk_net == net && inet->num == hnum &&
+ !ipv6_only_sock(sk)) {
const __be32 rcv_saddr = inet->rcv_saddr;
int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -163,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
}
/* Optimize the common listener case. */
-struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+struct sock *__inet_lookup_listener(struct net *net,
+ struct inet_hashinfo *hashinfo,
const __be32 daddr, const unsigned short hnum,
const int dif)
{
@@ -178,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
if (inet->num == hnum && !sk->sk_node.next &&
(!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
- !sk->sk_bound_dev_if)
+ !sk->sk_bound_dev_if && sk->sk_net == net)
goto sherry_cache;
- sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
+ sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
}
if (sk) {
sherry_cache:
@@ -191,7 +196,8 @@ sherry_cache:
}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
-struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
+struct sock * __inet_lookup_established(struct net *net,
+ struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
const int dif)
@@ -210,13 +216,15 @@ struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
prefetch(head->chain.first);
read_lock(lock);
sk_for_each(sk, node, &head->chain) {
- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+ if (INET_MATCH(sk, net, hash, acookie,
+ saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_for_each(sk, node, &head->twchain) {
- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+ if (INET_TW_MATCH(sk, net, hash, acookie,
+ saddr, daddr, ports, dif))
goto hit;
}
sk = NULL;
@@ -247,6 +255,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
+ struct net *net = sk->sk_net;
prefetch(head->chain.first);
write_lock(lock);
@@ -255,7 +264,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
sk_for_each(sk2, node, &head->twchain) {
tw = inet_twsk(sk2);
- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+ if (INET_TW_MATCH(sk2, net, hash, acookie,
+ saddr, daddr, ports, dif)) {
if (twsk_unique(sk, sk2, twp))
goto unique;
else
@@ -266,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
/* And established part... */
sk_for_each(sk2, node, &head->chain) {
- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+ if (INET_MATCH(sk2, net, hash, acookie,
+ saddr, daddr, ports, dif))
goto not_unique;
}
@@ -348,17 +359,18 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
}
EXPORT_SYMBOL_GPL(__inet_hash);
-/*
- * Bind a port for a connect operation and hash it.
- */
-int inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk)
+int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk,
+ int (*check_established)(struct inet_timewait_death_row *,
+ struct sock *, __u16, struct inet_timewait_sock **),
+ void (*hash)(struct inet_hashinfo *, struct sock *))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
const unsigned short snum = inet_sk(sk)->num;
struct inet_bind_hashbucket *head;
struct inet_bind_bucket *tb;
int ret;
+ struct net *net = sk->sk_net;
if (!snum) {
int i, remaining, low, high, port;
@@ -381,19 +393,19 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
* unique enough.
*/
inet_bind_bucket_for_each(tb, node, &head->chain) {
- if (tb->port == port) {
+ if (tb->ib_net == net && tb->port == port) {
BUG_TRAP(!hlist_empty(&tb->owners));
if (tb->fastreuse >= 0)
goto next_port;
- if (!__inet_check_established(death_row,
- sk, port,
- &tw))
+ if (!check_established(death_row, sk,
+ port, &tw))
goto ok;
goto next_port;
}
}
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port);
if (!tb) {
spin_unlock(&head->lock);
break;
@@ -415,7 +427,7 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->sport = htons(port);
- __inet_hash_nolisten(hinfo, sk);
+ hash(hinfo, sk);
}
spin_unlock(&head->lock);
@@ -432,17 +444,28 @@ ok:
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __inet_hash_nolisten(hinfo, sk);
+ hash(hinfo, sk);
spin_unlock_bh(&head->lock);
return 0;
} else {
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
- ret = __inet_check_established(death_row, sk, snum, NULL);
+ ret = check_established(death_row, sk, snum, NULL);
out:
local_bh_enable();
return ret;
}
}
+EXPORT_SYMBOL_GPL(__inet_hash_connect);
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+{
+ return __inet_hash_connect(death_row, sk,
+ __inet_check_established, __inet_hash_nolisten);
+}
EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 18070ca65771..341779e685d9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -168,6 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
}
skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
/* Send it out. */
return ip_local_out(skb);
@@ -385,6 +386,7 @@ packet_routed:
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
return ip_local_out(skb);
@@ -476,6 +478,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *frag;
int first_len = skb_pagelen(skb);
+ int truesizes = 0;
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
@@ -499,7 +502,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
sock_hold(skb->sk);
frag->sk = skb->sk;
frag->destructor = sock_wfree;
- skb->truesize -= frag->truesize;
+ truesizes += frag->truesize;
}
}
@@ -510,6 +513,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
frag = skb_shinfo(skb)->frag_list;
skb_shinfo(skb)->frag_list = NULL;
skb->data_len = first_len - skb_headlen(skb);
+ skb->truesize -= truesizes;
skb->len = first_len;
iph->tot_len = htons(first_len);
iph->frag_off = htons(IP_MF);
@@ -1284,6 +1288,7 @@ int ip_push_pending_frames(struct sock *sk)
iph->daddr = rt->rt_dst;
skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
skb->dst = dst_clone(&rt->u.dst);
if (iph->protocol == IPPROTO_ICMP)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f4af99ad8fdb..ae1f45fc23b9 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -74,6 +74,7 @@ out:
static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ int nexthdr;
int err = -ENOMEM;
struct ip_comp_hdr *ipch;
@@ -84,13 +85,15 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
/* Remove ipcomp header and decompress original payload */
ipch = (void *)skb->data;
+ nexthdr = ipch->nexthdr;
+
skb->transport_header = skb->network_header + sizeof(*ipch);
__skb_pull(skb, sizeof(*ipch));
err = ipcomp_decompress(x, skb);
if (err)
goto out;
- err = ipch->nexthdr;
+ err = nexthdr;
out:
return err;
@@ -434,7 +437,7 @@ error:
goto out;
}
-static struct xfrm_type ipcomp_type = {
+static const struct xfrm_type ipcomp_type = {
.description = "IPCOMP4",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b4a810c28ac8..a7591ce344d2 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -22,6 +22,7 @@
#include <linux/mutex.h>
#include <linux/err.h>
#include <net/compat.h>
+#include <net/sock.h>
#include <asm/uaccess.h>
#include <linux/netfilter/x_tables.h>
@@ -850,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info,
}
#endif
-static int get_info(void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user, int *len, int compat)
{
char name[ARPT_TABLE_MAXNAMELEN];
struct arpt_table *t;
@@ -870,7 +871,7 @@ static int get_info(void __user *user, int *len, int compat)
if (compat)
xt_compat_lock(NF_ARP);
#endif
- t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
+ t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
"arptable_%s", name);
if (t && !IS_ERR(t)) {
struct arpt_getinfo info;
@@ -908,7 +909,8 @@ static int get_info(void __user *user, int *len, int compat)
return ret;
}
-static int get_entries(struct arpt_get_entries __user *uptr, int *len)
+static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
+ int *len)
{
int ret;
struct arpt_get_entries get;
@@ -926,7 +928,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len)
return -EINVAL;
}
- t = xt_find_table_lock(NF_ARP, get.name);
+ t = xt_find_table_lock(net, NF_ARP, get.name);
if (t && !IS_ERR(t)) {
struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n",
@@ -947,7 +949,8 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len)
return ret;
}
-static int __do_replace(const char *name, unsigned int valid_hooks,
+static int __do_replace(struct net *net, const char *name,
+ unsigned int valid_hooks,
struct xt_table_info *newinfo,
unsigned int num_counters,
void __user *counters_ptr)
@@ -966,7 +969,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
+ t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
"arptable_%s", name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1019,7 +1022,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks,
return ret;
}
-static int do_replace(void __user *user, unsigned int len)
+static int do_replace(struct net *net, void __user *user, unsigned int len)
{
int ret;
struct arpt_replace tmp;
@@ -1053,7 +1056,7 @@ static int do_replace(void __user *user, unsigned int len)
duprintf("arp_tables: Translated table\n");
- ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
goto free_newinfo_untrans;
@@ -1080,7 +1083,8 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
return 0;
}
-static int do_add_counters(void __user *user, unsigned int len, int compat)
+static int do_add_counters(struct net *net, void __user *user, unsigned int len,
+ int compat)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1132,7 +1136,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat)
goto free;
}
- t = xt_find_table_lock(NF_ARP, name);
+ t = xt_find_table_lock(net, NF_ARP, name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1435,7 +1439,8 @@ struct compat_arpt_replace {
struct compat_arpt_entry entries[0];
};
-static int compat_do_replace(void __user *user, unsigned int len)
+static int compat_do_replace(struct net *net, void __user *user,
+ unsigned int len)
{
int ret;
struct compat_arpt_replace tmp;
@@ -1471,7 +1476,7 @@ static int compat_do_replace(void __user *user, unsigned int len)
duprintf("compat_do_replace: Translated table\n");
- ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
goto free_newinfo_untrans;
@@ -1494,11 +1499,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = compat_do_replace(user, len);
+ ret = compat_do_replace(sk->sk_net, user, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len, 1);
+ ret = do_add_counters(sk->sk_net, user, len, 1);
break;
default:
@@ -1584,7 +1589,8 @@ struct compat_arpt_get_entries {
struct compat_arpt_entry entrytable[0];
};
-static int compat_get_entries(struct compat_arpt_get_entries __user *uptr,
+static int compat_get_entries(struct net *net,
+ struct compat_arpt_get_entries __user *uptr,
int *len)
{
int ret;
@@ -1604,7 +1610,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr,
}
xt_compat_lock(NF_ARP);
- t = xt_find_table_lock(NF_ARP, get.name);
+ t = xt_find_table_lock(net, NF_ARP, get.name);
if (t && !IS_ERR(t)) {
struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1641,10 +1647,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case ARPT_SO_GET_INFO:
- ret = get_info(user, len, 1);
+ ret = get_info(sk->sk_net, user, len, 1);
break;
case ARPT_SO_GET_ENTRIES:
- ret = compat_get_entries(user, len);
+ ret = compat_get_entries(sk->sk_net, user, len);
break;
default:
ret = do_arpt_get_ctl(sk, cmd, user, len);
@@ -1662,11 +1668,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = do_replace(user, len);
+ ret = do_replace(sk->sk_net, user, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len, 0);
+ ret = do_add_counters(sk->sk_net, user, len, 0);
break;
default:
@@ -1686,11 +1692,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
switch (cmd) {
case ARPT_SO_GET_INFO:
- ret = get_info(user, len, 0);
+ ret = get_info(sk->sk_net, user, len, 0);
break;
case ARPT_SO_GET_ENTRIES:
- ret = get_entries(user, len);
+ ret = get_entries(sk->sk_net, user, len);
break;
case ARPT_SO_GET_REVISION_TARGET: {
@@ -1719,19 +1725,21 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-int arpt_register_table(struct arpt_table *table,
- const struct arpt_replace *repl)
+struct arpt_table *arpt_register_table(struct net *net,
+ struct arpt_table *table,
+ const struct arpt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
struct xt_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry;
+ struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo) {
ret = -ENOMEM;
- return ret;
+ goto out;
}
/* choose the copy on our node/cpu */
@@ -1745,24 +1753,27 @@ int arpt_register_table(struct arpt_table *table,
repl->underflow);
duprintf("arpt_register_table: translate table gives %d\n", ret);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
- }
+ if (ret != 0)
+ goto out_free;
- ret = xt_register_table(table, &bootstrap, newinfo);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
}
+ return new_table;
- return 0;
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
}
void arpt_unregister_table(struct arpt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
+ struct module *table_owner = table->me;
private = xt_unregister_table(table);
@@ -1770,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table)
loc_cpu_entry = private->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
cleanup_entry, NULL);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
xt_free_table_info(private);
}
@@ -1809,11 +1822,26 @@ static struct nf_sockopt_ops arpt_sockopts = {
.owner = THIS_MODULE,
};
+static int __net_init arp_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NF_ARP);
+}
+
+static void __net_exit arp_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NF_ARP);
+}
+
+static struct pernet_operations arp_tables_net_ops = {
+ .init = arp_tables_net_init,
+ .exit = arp_tables_net_exit,
+};
+
static int __init arp_tables_init(void)
{
int ret;
- ret = xt_proto_init(NF_ARP);
+ ret = register_pernet_subsys(&arp_tables_net_ops);
if (ret < 0)
goto err1;
@@ -1838,7 +1866,7 @@ err4:
err3:
xt_unregister_target(&arpt_standard_target);
err2:
- xt_proto_fini(NF_ARP);
+ unregister_pernet_subsys(&arp_tables_net_ops);
err1:
return ret;
}
@@ -1848,7 +1876,7 @@ static void __exit arp_tables_fini(void)
nf_unregister_sockopt(&arpt_sockopts);
xt_unregister_target(&arpt_error_target);
xt_unregister_target(&arpt_standard_target);
- xt_proto_fini(NF_ARP);
+ unregister_pernet_subsys(&arp_tables_net_ops);
}
EXPORT_SYMBOL(arpt_register_table);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 7201511d54d2..4e9c496a30c2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -20,7 +20,7 @@ static struct
struct arpt_replace repl;
struct arpt_standard entries[3];
struct arpt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
.repl = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
@@ -61,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(skb, hook, in, out, &packet_filter);
+ return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter);
}
static struct nf_hook_ops arpt_ops[] __read_mostly = {
@@ -85,12 +85,31 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
},
};
+static int __net_init arptable_filter_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.arptable_filter =
+ arpt_register_table(net, &packet_filter, &initial_table.repl);
+ if (IS_ERR(net->ipv4.arptable_filter))
+ return PTR_ERR(net->ipv4.arptable_filter);
+ return 0;
+}
+
+static void __net_exit arptable_filter_net_exit(struct net *net)
+{
+ arpt_unregister_table(net->ipv4.arptable_filter);
+}
+
+static struct pernet_operations arptable_filter_net_ops = {
+ .init = arptable_filter_net_init,
+ .exit = arptable_filter_net_exit,
+};
+
static int __init arptable_filter_init(void)
{
int ret;
- /* Register table */
- ret = arpt_register_table(&packet_filter, &initial_table.repl);
+ ret = register_pernet_subsys(&arptable_filter_net_ops);
if (ret < 0)
return ret;
@@ -100,14 +119,14 @@ static int __init arptable_filter_init(void)
return ret;
cleanup_table:
- arpt_unregister_table(&packet_filter);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
return ret;
}
static void __exit arptable_filter_fini(void)
{
nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
- arpt_unregister_table(&packet_filter);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
}
module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5109839da222..6bda1102851b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -512,6 +512,7 @@ static struct notifier_block ipq_nl_notifier = {
.notifier_call = ipq_rcv_nl_event,
};
+#ifdef CONFIG_SYSCTL
static struct ctl_table_header *ipq_sysctl_header;
static ctl_table ipq_table[] = {
@@ -525,7 +526,9 @@ static ctl_table ipq_table[] = {
},
{ .ctl_name = 0 }
};
+#endif
+#ifdef CONFIG_PROC_FS
static int ip_queue_show(struct seq_file *m, void *v)
{
read_lock_bh(&queue_lock);
@@ -562,6 +565,7 @@ static const struct file_operations ip_queue_proc_fops = {
.release = single_release,
.owner = THIS_MODULE,
};
+#endif
static const struct nf_queue_handler nfqh = {
.name = "ip_queue",
@@ -571,7 +575,7 @@ static const struct nf_queue_handler nfqh = {
static int __init ip_queue_init(void)
{
int status = -ENOMEM;
- struct proc_dir_entry *proc;
+ struct proc_dir_entry *proc __maybe_unused;
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
@@ -581,6 +585,7 @@ static int __init ip_queue_init(void)
goto cleanup_netlink_notifier;
}
+#ifdef CONFIG_PROC_FS
proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
if (proc) {
proc->owner = THIS_MODULE;
@@ -589,10 +594,11 @@ static int __init ip_queue_init(void)
printk(KERN_ERR "ip_queue: failed to create proc entry\n");
goto cleanup_ipqnl;
}
-
+#endif
register_netdevice_notifier(&ipq_dev_notifier);
+#ifdef CONFIG_SYSCTL
ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
-
+#endif
status = nf_register_queue_handler(PF_INET, &nfqh);
if (status < 0) {
printk(KERN_ERR "ip_queue: failed to register queue handler\n");
@@ -601,10 +607,12 @@ static int __init ip_queue_init(void)
return status;
cleanup_sysctl:
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ipq_sysctl_header);
+#endif
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-cleanup_ipqnl:
+cleanup_ipqnl: __maybe_unused
netlink_kernel_release(ipqnl);
mutex_lock(&ipqnl_mutex);
mutex_unlock(&ipqnl_mutex);
@@ -620,7 +628,9 @@ static void __exit ip_queue_fini(void)
synchronize_net();
ipq_flush(NULL, 0);
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ipq_sysctl_header);
+#endif
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 982b7f986291..600737f122d2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -291,7 +291,7 @@ static void trace_packet(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- char *tablename,
+ const char *tablename,
struct xt_table_info *private,
struct ipt_entry *e)
{
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
}
#endif
-static int get_info(void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user, int *len, int compat)
{
char name[IPT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -1112,7 +1112,7 @@ static int get_info(void __user *user, int *len, int compat)
if (compat)
xt_compat_lock(AF_INET);
#endif
- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
if (t && !IS_ERR(t)) {
struct ipt_getinfo info;
@@ -1152,7 +1152,7 @@ static int get_info(void __user *user, int *len, int compat)
}
static int
-get_entries(struct ipt_get_entries __user *uptr, int *len)
+get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
{
int ret;
struct ipt_get_entries get;
@@ -1170,7 +1170,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
return -EINVAL;
}
- t = xt_find_table_lock(AF_INET, get.name);
+ t = xt_find_table_lock(net, AF_INET, get.name);
if (t && !IS_ERR(t)) {
struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
@@ -1191,7 +1191,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
}
static int
-__do_replace(const char *name, unsigned int valid_hooks,
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table_info *newinfo, unsigned int num_counters,
void __user *counters_ptr)
{
@@ -1208,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1261,7 +1261,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
}
static int
-do_replace(void __user *user, unsigned int len)
+do_replace(struct net *net, void __user *user, unsigned int len)
{
int ret;
struct ipt_replace tmp;
@@ -1295,7 +1295,7 @@ do_replace(void __user *user, unsigned int len)
duprintf("ip_tables: Translated table\n");
- ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
goto free_newinfo_untrans;
@@ -1331,7 +1331,7 @@ add_counter_to_entry(struct ipt_entry *e,
}
static int
-do_add_counters(void __user *user, unsigned int len, int compat)
+do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1383,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat)
goto free;
}
- t = xt_find_table_lock(AF_INET, name);
+ t = xt_find_table_lock(net, AF_INET, name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1429,7 +1429,7 @@ struct compat_ipt_replace {
static int
compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
- compat_uint_t *size, struct xt_counters *counters,
+ unsigned int *size, struct xt_counters *counters,
unsigned int *i)
{
struct ipt_entry_target *t;
@@ -1476,7 +1476,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
- int *size, int *i)
+ int *size, unsigned int *i)
{
struct xt_match *match;
@@ -1534,7 +1534,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
struct ipt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
- int ret, off, h, j;
+ unsigned int j;
+ int ret, off, h;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
@@ -1647,7 +1648,8 @@ static int
compat_check_entry(struct ipt_entry *e, const char *name,
unsigned int *i)
{
- int j, ret;
+ unsigned int j;
+ int ret;
j = 0;
ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
@@ -1789,7 +1791,7 @@ out_unlock:
}
static int
-compat_do_replace(void __user *user, unsigned int len)
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
{
int ret;
struct compat_ipt_replace tmp;
@@ -1826,7 +1828,7 @@ compat_do_replace(void __user *user, unsigned int len)
duprintf("compat_do_replace: Translated table\n");
- ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
goto free_newinfo_untrans;
@@ -1850,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = compat_do_replace(user, len);
+ ret = compat_do_replace(sk->sk_net, user, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len, 1);
+ ret = do_add_counters(sk->sk_net, user, len, 1);
break;
default:
@@ -1903,7 +1905,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
}
static int
-compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
+compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
+ int *len)
{
int ret;
struct compat_ipt_get_entries get;
@@ -1924,7 +1927,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
}
xt_compat_lock(AF_INET);
- t = xt_find_table_lock(AF_INET, get.name);
+ t = xt_find_table_lock(net, AF_INET, get.name);
if (t && !IS_ERR(t)) {
struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1960,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IPT_SO_GET_INFO:
- ret = get_info(user, len, 1);
+ ret = get_info(sk->sk_net, user, len, 1);
break;
case IPT_SO_GET_ENTRIES:
- ret = compat_get_entries(user, len);
+ ret = compat_get_entries(sk->sk_net, user, len);
break;
default:
ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -1982,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = do_replace(user, len);
+ ret = do_replace(sk->sk_net, user, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len, 0);
+ ret = do_add_counters(sk->sk_net, user, len, 0);
break;
default:
@@ -2007,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IPT_SO_GET_INFO:
- ret = get_info(user, len, 0);
+ ret = get_info(sk->sk_net, user, len, 0);
break;
case IPT_SO_GET_ENTRIES:
- ret = get_entries(user, len);
+ ret = get_entries(sk->sk_net, user, len);
break;
case IPT_SO_GET_REVISION_MATCH:
@@ -2048,17 +2051,21 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
+struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
+ const struct ipt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
struct xt_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry;
+ struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo)
- return -ENOMEM;
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
/* choose the copy on our node/cpu, but dont care about preemption */
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
@@ -2069,30 +2076,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
repl->num_entries,
repl->hook_entry,
repl->underflow);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
- }
+ if (ret != 0)
+ goto out_free;
- ret = xt_register_table(table, &bootstrap, newinfo);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
}
- return 0;
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
}
void ipt_unregister_table(struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
+ struct module *table_owner = table->me;
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
xt_free_table_info(private);
}
@@ -2200,11 +2213,26 @@ static struct xt_match icmp_matchstruct __read_mostly = {
.family = AF_INET,
};
+static int __net_init ip_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, AF_INET);
+}
+
+static void __net_exit ip_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, AF_INET);
+}
+
+static struct pernet_operations ip_tables_net_ops = {
+ .init = ip_tables_net_init,
+ .exit = ip_tables_net_exit,
+};
+
static int __init ip_tables_init(void)
{
int ret;
- ret = xt_proto_init(AF_INET);
+ ret = register_pernet_subsys(&ip_tables_net_ops);
if (ret < 0)
goto err1;
@@ -2234,7 +2262,7 @@ err4:
err3:
xt_unregister_target(&ipt_standard_target);
err2:
- xt_proto_fini(AF_INET);
+ unregister_pernet_subsys(&ip_tables_net_ops);
err1:
return ret;
}
@@ -2247,7 +2275,7 @@ static void __exit ip_tables_fini(void)
xt_unregister_target(&ipt_error_target);
xt_unregister_target(&ipt_standard_target);
- xt_proto_fini(AF_INET);
+ unregister_pernet_subsys(&ip_tables_net_ops);
}
EXPORT_SYMBOL(ipt_register_table);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1b31f7d14d46..c6cf84c77611 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -76,13 +76,6 @@ clusterip_config_put(struct clusterip_config *c)
kfree(c);
}
-/* increase the count of entries(rules) using/referencing this config */
-static inline void
-clusterip_config_entry_get(struct clusterip_config *c)
-{
- atomic_inc(&c->entries);
-}
-
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index e3154a99c08a..68cbe3ca01ce 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -212,11 +212,11 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in,
recent_entry_remove(t, e);
ret = !ret;
} else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
- unsigned long t = jiffies - info->seconds * HZ;
+ unsigned long time = jiffies - info->seconds * HZ;
unsigned int i, hits = 0;
for (i = 0; i < e->nstamps; i++) {
- if (info->seconds && time_after(t, e->stamps[i]))
+ if (info->seconds && time_after(time, e->stamps[i]))
continue;
if (++hits >= info->hit_count) {
ret = !ret;
@@ -320,6 +320,7 @@ struct recent_iter_state {
};
static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(recent_lock)
{
struct recent_iter_state *st = seq->private;
const struct recent_table *t = st->table;
@@ -352,6 +353,7 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void recent_seq_stop(struct seq_file *s, void *v)
+ __releases(recent_lock)
{
spin_unlock_bh(&recent_lock);
}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 29bb4f9fbda0..69f3d7e6e96f 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -28,7 +28,7 @@ static struct
struct ipt_replace repl;
struct ipt_standard entries[3];
struct ipt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
.repl = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
@@ -69,7 +69,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
static unsigned int
@@ -88,7 +88,7 @@ ipt_local_out_hook(unsigned int hook,
return NF_ACCEPT;
}
- return ipt_do_table(skb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
static struct nf_hook_ops ipt_ops[] __read_mostly = {
@@ -119,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
static int forward = NF_ACCEPT;
module_param(forward, bool, 0000);
+static int __net_init iptable_filter_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_filter =
+ ipt_register_table(net, &packet_filter, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_filter))
+ return PTR_ERR(net->ipv4.iptable_filter);
+ return 0;
+}
+
+static void __net_exit iptable_filter_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_filter);
+}
+
+static struct pernet_operations iptable_filter_net_ops = {
+ .init = iptable_filter_net_init,
+ .exit = iptable_filter_net_exit,
+};
+
static int __init iptable_filter_init(void)
{
int ret;
@@ -131,8 +151,7 @@ static int __init iptable_filter_init(void)
/* Entry 1 is the FORWARD hook */
initial_table.entries[1].target.verdict = -forward - 1;
- /* Register table */
- ret = ipt_register_table(&packet_filter, &initial_table.repl);
+ ret = register_pernet_subsys(&iptable_filter_net_ops);
if (ret < 0)
return ret;
@@ -144,14 +163,14 @@ static int __init iptable_filter_init(void)
return ret;
cleanup_table:
- ipt_unregister_table(&packet_filter);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
return ret;
}
static void __exit iptable_filter_fini(void)
{
nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- ipt_unregister_table(&packet_filter);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
}
module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 5c4be202430c..c55a210853a7 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -33,7 +33,7 @@ static struct
struct ipt_replace repl;
struct ipt_standard entries[5];
struct ipt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
.repl = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
@@ -80,7 +80,7 @@ ipt_route_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, &packet_mangler);
+ return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
}
static unsigned int
@@ -112,7 +112,7 @@ ipt_local_hook(unsigned int hook,
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, hook, in, out, &packet_mangler);
+ ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
iph = ip_hdr(skb);
@@ -166,12 +166,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
},
};
+static int __net_init iptable_mangle_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_mangle =
+ ipt_register_table(net, &packet_mangler, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_mangle))
+ return PTR_ERR(net->ipv4.iptable_mangle);
+ return 0;
+}
+
+static void __net_exit iptable_mangle_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_mangle);
+}
+
+static struct pernet_operations iptable_mangle_net_ops = {
+ .init = iptable_mangle_net_init,
+ .exit = iptable_mangle_net_exit,
+};
+
static int __init iptable_mangle_init(void)
{
int ret;
- /* Register table */
- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
+ ret = register_pernet_subsys(&iptable_mangle_net_ops);
if (ret < 0)
return ret;
@@ -183,14 +202,14 @@ static int __init iptable_mangle_init(void)
return ret;
cleanup_table:
- ipt_unregister_table(&packet_mangler);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
return ret;
}
static void __exit iptable_mangle_fini(void)
{
nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- ipt_unregister_table(&packet_mangler);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
}
module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index dc34aa274533..e41fe8ca4e1c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -14,7 +14,7 @@ static struct
struct ipt_replace repl;
struct ipt_standard entries[2];
struct ipt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
.repl = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -52,7 +52,7 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
}
static unsigned int
@@ -70,7 +70,7 @@ ipt_local_hook(unsigned int hook,
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(skb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
}
/* 'raw' is the very first table. */
@@ -91,12 +91,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
},
};
+static int __net_init iptable_raw_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_raw =
+ ipt_register_table(net, &packet_raw, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_raw))
+ return PTR_ERR(net->ipv4.iptable_raw);
+ return 0;
+}
+
+static void __net_exit iptable_raw_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_raw);
+}
+
+static struct pernet_operations iptable_raw_net_ops = {
+ .init = iptable_raw_net_init,
+ .exit = iptable_raw_net_exit,
+};
+
static int __init iptable_raw_init(void)
{
int ret;
- /* Register table */
- ret = ipt_register_table(&packet_raw, &initial_table.repl);
+ ret = register_pernet_subsys(&iptable_raw_net_ops);
if (ret < 0)
return ret;
@@ -108,14 +127,14 @@ static int __init iptable_raw_init(void)
return ret;
cleanup_table:
- ipt_unregister_table(&packet_raw);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
return ret;
}
static void __exit iptable_raw_fini(void)
{
nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- ipt_unregister_table(&packet_raw);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ac3d61d8026e..a65b845c5f15 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -27,7 +27,8 @@
static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
- __be32 _addrs[2], *ap;
+ const __be32 *ap;
+ __be32 _addrs[2];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
@@ -76,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
- struct iphdr _iph, *iph;
+ const struct iphdr *iph;
+ struct iphdr _iph;
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (iph == NULL)
@@ -111,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- struct nf_conn_help *help;
- struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
@@ -299,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = {
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
- struct inet_sock *inet = inet_sk(sk);
- struct nf_conntrack_tuple_hash *h;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
NF_CT_TUPLE_U_BLANK(&tuple);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 543c02b74c96..089252e82c01 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -39,12 +39,14 @@ struct ct_iter_state {
static struct hlist_node *ct_get_first(struct seq_file *seq)
{
struct ct_iter_state *st = seq->private;
+ struct hlist_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- if (!hlist_empty(&nf_conntrack_hash[st->bucket]))
- return nf_conntrack_hash[st->bucket].first;
+ n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+ if (n)
+ return n;
}
return NULL;
}
@@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
{
struct ct_iter_state *st = seq->private;
- head = head->next;
+ head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
- head = nf_conntrack_hash[st->bucket].first;
+ head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
}
return head;
}
@@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
}
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
- read_lock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
return ct_get_idx(seq, *pos);
}
@@ -86,16 +89,17 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void ct_seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
{
- read_unlock_bh(&nf_conntrack_lock);
+ rcu_read_unlock();
}
static int ct_seq_show(struct seq_file *s, void *v)
{
const struct nf_conntrack_tuple_hash *hash = v;
const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
NF_CT_ASSERT(ct);
@@ -191,10 +195,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
- return nf_ct_expect_hash[st->bucket].first;
+ n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+ if (n)
+ return n;
}
return NULL;
}
@@ -204,11 +210,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
{
struct ct_expect_iter_state *st = seq->private;
- head = head->next;
+ head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = nf_ct_expect_hash[st->bucket].first;
+ head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
}
return head;
}
@@ -224,8 +230,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
}
static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
- read_lock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
return ct_expect_get_idx(seq, *pos);
}
@@ -236,14 +243,15 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
- read_unlock_bh(&nf_conntrack_lock);
+ rcu_read_unlock();
}
static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *exp;
- struct hlist_node *n = v;
+ const struct hlist_node *n = v;
exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
@@ -324,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
- struct ip_conntrack_stat *st = v;
+ const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4004a04c5510..6873fddb3529 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -26,7 +26,8 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
- struct icmphdr _hdr, *hp;
+ const struct icmphdr *hp;
+ struct icmphdr _hdr;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
@@ -100,7 +101,7 @@ static int icmp_packet(struct nf_conn *ct,
}
/* Called when a new connection for this protocol found. */
-static int icmp_new(struct nf_conn *conntrack,
+static int icmp_new(struct nf_conn *ct,
const struct sk_buff *skb, unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
@@ -110,15 +111,15 @@ static int icmp_new(struct nf_conn *conntrack,
[ICMP_ADDRESS] = 1
};
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+ || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
pr_debug("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
- NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+ ct->tuplehash[0].tuple.dst.u.icmp.type);
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
return 0;
}
- atomic_set(&conntrack->proto.icmp.count, 0);
+ atomic_set(&ct->proto.icmp.count, 0);
return 1;
}
@@ -129,8 +130,8 @@ icmp_error_message(struct sk_buff *skb,
unsigned int hooknum)
{
struct nf_conntrack_tuple innertuple, origtuple;
- struct nf_conntrack_l4proto *innerproto;
- struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_l4proto *innerproto;
+ const struct nf_conntrack_tuple_hash *h;
NF_CT_ASSERT(skb->nfct == NULL);
@@ -176,7 +177,8 @@ static int
icmp_error(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
{
- struct icmphdr _ih, *icmph;
+ const struct icmphdr *icmph;
+ struct icmphdr _ih;
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index e53ae1ef8f5e..dd07362d2b8f 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -31,7 +31,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
-static DEFINE_RWLOCK(nf_nat_lock);
+static DEFINE_SPINLOCK(nf_nat_lock);
static struct nf_conntrack_l3proto *l3proto __read_mostly;
@@ -154,8 +154,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
struct nf_conn *ct;
struct hlist_node *n;
- read_lock_bh(&nf_nat_lock);
- hlist_for_each_entry(nat, n, &bysource[h], bysource) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
@@ -164,12 +164,12 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
result->dst = tuple->dst;
if (in_range(result, range)) {
- read_unlock_bh(&nf_nat_lock);
+ rcu_read_unlock();
return 1;
}
}
}
- read_unlock_bh(&nf_nat_lock);
+ rcu_read_unlock();
return 0;
}
@@ -330,12 +330,12 @@ nf_nat_setup_info(struct nf_conn *ct,
unsigned int srchash;
srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- write_lock_bh(&nf_nat_lock);
+ spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate exntension aera */
nat = nfct_nat(ct);
nat->ct = ct;
- hlist_add_head(&nat->bysource, &bysource[srchash]);
- write_unlock_bh(&nf_nat_lock);
+ hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
}
/* It's done. */
@@ -521,14 +521,14 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
{
int ret = 0;
- write_lock_bh(&nf_nat_lock);
+ spin_lock_bh(&nf_nat_lock);
if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
ret = -EBUSY;
goto out;
}
rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
out:
- write_unlock_bh(&nf_nat_lock);
+ spin_unlock_bh(&nf_nat_lock);
return ret;
}
EXPORT_SYMBOL(nf_nat_protocol_register);
@@ -536,10 +536,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
/* Noone stores the protocol anywhere; simply delete it. */
void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
{
- write_lock_bh(&nf_nat_lock);
+ spin_lock_bh(&nf_nat_lock);
rcu_assign_pointer(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
- write_unlock_bh(&nf_nat_lock);
+ spin_unlock_bh(&nf_nat_lock);
synchronize_rcu();
}
EXPORT_SYMBOL(nf_nat_protocol_unregister);
@@ -594,10 +594,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
- write_lock_bh(&nf_nat_lock);
- hlist_del(&nat->bysource);
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
nat->ct = NULL;
- write_unlock_bh(&nf_nat_lock);
+ spin_unlock_bh(&nf_nat_lock);
}
static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
@@ -609,10 +609,10 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
return;
- write_lock_bh(&nf_nat_lock);
+ spin_lock_bh(&nf_nat_lock);
hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
new_nat->ct = ct;
- write_unlock_bh(&nf_nat_lock);
+ spin_unlock_bh(&nf_nat_lock);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -646,17 +646,13 @@ static int __init nf_nat_init(void)
}
/* Sew in builtin protocols. */
- write_lock_bh(&nf_nat_lock);
+ spin_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
- write_unlock_bh(&nf_nat_lock);
-
- for (i = 0; i < nf_nat_htable_size; i++) {
- INIT_HLIST_HEAD(&bysource[i]);
- }
+ spin_unlock_bh(&nf_nat_lock);
/* Initialize fake conntrack so that NAT will skip it */
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index a121989fdad7..ee47bf28c825 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -32,7 +32,8 @@ static int set_addr(struct sk_buff *skb,
__be32 ip;
__be16 port;
} __attribute__ ((__packed__)) buf;
- struct tcphdr _tcph, *th;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
buf.ip = ip;
buf.port = port;
@@ -99,7 +100,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data,
TransportAddress *taddr, int count)
{
- struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
int dir = CTINFO2DIR(ctinfo);
int i;
__be16 port;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4c0232842e75..ca57f47bbd25 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -44,8 +44,7 @@ adjust_tcp_sequence(u32 seq,
struct nf_nat_seq *this_way, *other_way;
struct nf_conn_nat *nat = nfct_nat(ct);
- pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
- ntohl(seq), seq);
+ pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq);
dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e63b944a2ebb..3a1e6d6afc0a 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp");
static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
- struct nf_conn *master = ct->master;
+ const struct nf_conn *master = ct->master;
struct nf_conntrack_expect *other_exp;
struct nf_conntrack_tuple t;
- struct nf_ct_pptp_master *ct_pptp_info;
- struct nf_nat_pptp *nat_pptp_info;
+ const struct nf_ct_pptp_master *ct_pptp_info;
+ const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
@@ -186,7 +186,7 @@ static void
pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
struct nf_conntrack_expect *expect_reply)
{
- struct nf_conn *ct = expect_orig->master;
+ const struct nf_conn *ct = expect_orig->master;
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
@@ -217,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq)
{
- struct nf_nat_pptp *nat_pptp_info;
+ const struct nf_nat_pptp *nat_pptp_info;
u_int16_t msg;
__be16 new_pcid;
unsigned int pcid_off;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9fa272e73113..a1e4da16da2e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -59,7 +59,7 @@ static int
gre_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
- const struct nf_conn *conntrack)
+ const struct nf_conn *ct)
{
static u_int16_t key;
__be16 *keyptr;
@@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* If there is no master conntrack we are not PPTP,
do not change tuples */
- if (!conntrack->master)
+ if (!ct->master)
return 0;
if (maniptype == IP_NAT_MANIP_SRC)
@@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
keyptr = &tuple->dst.u.gre.key;
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- pr_debug("%p: NATing GRE PPTP\n", conntrack);
+ pr_debug("%p: NATing GRE PPTP\n", ct);
min = 1;
range_size = 0xffff;
} else {
@@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
for (i = 0; i < range_size; i++, key++) {
*keyptr = htons(min + key % range_size);
- if (!nf_nat_used_tuple(tuple, conntrack))
+ if (!nf_nat_used_tuple(tuple, ct))
return 1;
}
- pr_debug("%p: no NAT mapping\n", conntrack);
+ pr_debug("%p: no NAT mapping\n", ct);
return 0;
}
@@ -104,7 +104,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
{
struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
unsigned int hdroff = iphdroff + iph->ihl * 4;
/* pgreh includes two optional 32bit fields which are not required
@@ -148,12 +148,12 @@ static const struct nf_nat_protocol gre = {
#endif
};
-int __init nf_nat_proto_gre_init(void)
+static int __init nf_nat_proto_gre_init(void)
{
return nf_nat_protocol_register(&gre);
}
-void __exit nf_nat_proto_gre_fini(void)
+static void __exit nf_nat_proto_gre_fini(void)
{
nf_nat_protocol_unregister(&gre);
}
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index a0e44c953cb6..03a02969aa57 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -57,7 +57,7 @@ icmp_manip_pkt(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct icmphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index da23e9fbe679..ffd5d1589eca 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 10df4db078af..4b8f49910ff2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -91,7 +91,7 @@ udp_manip_pkt(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 519182269e76..f8fda57ba20b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -58,13 +58,14 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table nat_table = {
+static struct xt_table __nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
.lock = RW_LOCK_UNLOCKED,
.me = THIS_MODULE,
.af = AF_INET,
};
+static struct xt_table *nat_table;
/* Source NAT */
static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -214,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
{
int ret;
- ret = ipt_do_table(skb, hooknum, in, out, &nat_table);
+ ret = ipt_do_table(skb, hooknum, in, out, nat_table);
if (ret == NF_ACCEPT) {
if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -248,9 +249,10 @@ int __init nf_nat_rule_init(void)
{
int ret;
- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
- if (ret != 0)
- return ret;
+ nat_table = ipt_register_table(&init_net, &__nat_table,
+ &nat_initial_table.repl);
+ if (IS_ERR(nat_table))
+ return PTR_ERR(nat_table);
ret = xt_register_target(&ipt_snat_reg);
if (ret != 0)
goto unregister_table;
@@ -264,7 +266,7 @@ int __init nf_nat_rule_init(void)
unregister_snat:
xt_unregister_target(&ipt_snat_reg);
unregister_table:
- ipt_unregister_table(&nat_table);
+ ipt_unregister_table(nat_table);
return ret;
}
@@ -273,5 +275,5 @@ void nf_nat_rule_cleanup(void)
{
xt_unregister_target(&ipt_dnat_reg);
xt_unregister_target(&ipt_snat_reg);
- ipt_unregister_table(&nat_table);
+ ipt_unregister_table(nat_table);
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 606a170bf4ca..b4c8d4968bb2 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -35,9 +35,9 @@ struct addr_map {
} addr[IP_CT_DIR_MAX];
};
-static void addr_map_init(struct nf_conn *ct, struct addr_map *map)
+static void addr_map_init(const struct nf_conn *ct, struct addr_map *map)
{
- struct nf_conntrack_tuple *t;
+ const struct nf_conntrack_tuple *t;
enum ip_conntrack_dir dir;
unsigned int n;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 07f2a49926d4..540ce6ae887c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -260,7 +260,7 @@ static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
{
unsigned char ch;
- if (eoc == 0) {
+ if (eoc == NULL) {
if (!asn1_octet_decode(ctx, &ch))
return 0;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 1360a94766dd..b096e81500ae 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -24,7 +24,7 @@ static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp)
{
- struct nf_conn *ct = exp->master;
+ const struct nf_conn *ct = exp->master;
exp->saved_proto.udp.port
= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 85c08696abbe..a3002fe65b7f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -352,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
skb_reserve(skb, hh_len);
skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
skb->dst = dst_clone(&rt->u.dst);
skb_reset_network_header(skb);
@@ -544,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
{
struct flowi fl = { .oif = ipc.oif,
+ .mark = sk->sk_mark,
.nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = saddr,
@@ -860,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
struct hlist_node *node;
sk_for_each(sk, node, &state->h->ht[state->bucket])
- if (sk->sk_net == state->p.net &&
- sk->sk_family == state->family)
+ if (sk->sk_net == state->p.net)
goto found;
}
sk = NULL;
@@ -877,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
sk = sk_next(sk);
try_again:
;
- } while (sk && sk->sk_net != state->p.net &&
- sk->sk_family != state->family);
+ } while (sk && sk->sk_net != state->p.net);
if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
sk = sk_head(&state->h->ht[state->bucket]);
@@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v)
}
EXPORT_SYMBOL_GPL(raw_seq_stop);
-static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
{
struct inet_sock *inet = inet_sk(sp);
__be32 dest = inet->daddr,
@@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
__u16 destp = 0,
srcp = inet->num;
- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d",
i, src, srcp, dest, destp, sp->sk_state,
atomic_read(&sp->sk_wmem_alloc),
atomic_read(&sp->sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
- return tmpbuf;
}
-#define TMPSZ 128
-
static int raw_seq_show(struct seq_file *seq, void *v)
{
- char tmpbuf[TMPSZ+1];
-
if (v == SEQ_START_TOKEN)
- seq_printf(seq, "%-*s\n", TMPSZ-1,
- " sl local_address rem_address st tx_queue "
- "rx_queue tr tm->when retrnsmt uid timeout "
- "inode drops");
- else {
- struct raw_iter_state *state = raw_seq_private(seq);
-
- seq_printf(seq, "%-*s\n", TMPSZ-1,
- get_raw_sock(v, tmpbuf, state->bucket));
- }
+ seq_printf(seq, " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode drops\n");
+ else
+ raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
return 0;
}
@@ -972,27 +962,25 @@ static const struct seq_operations raw_seq_ops = {
.show = raw_seq_show,
};
-int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h,
- unsigned short family)
+int raw_seq_open(struct inode *ino, struct file *file,
+ struct raw_hashinfo *h, const struct seq_operations *ops)
{
int err;
struct raw_iter_state *i;
- err = seq_open_net(ino, file, &raw_seq_ops,
- sizeof(struct raw_iter_state));
+ err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
if (err < 0)
return err;
i = raw_seq_private((struct seq_file *)file->private_data);
i->h = h;
- i->family = family;
return 0;
}
EXPORT_SYMBOL_GPL(raw_seq_open);
static int raw_v4_seq_open(struct inode *inode, struct file *file)
{
- return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET);
+ return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
}
static const struct file_operations raw_seq_fops = {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 896c768e41a2..8842ecb9be48 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -117,8 +117,6 @@
#define RT_GC_TIMEOUT (300*HZ)
-static int ip_rt_min_delay = 2 * HZ;
-static int ip_rt_max_delay = 10 * HZ;
static int ip_rt_max_size;
static int ip_rt_gc_timeout = RT_GC_TIMEOUT;
static int ip_rt_gc_interval = 60 * HZ;
@@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ;
static int ip_rt_min_pmtu = 512 + 20 + 20;
static int ip_rt_min_advmss = 256;
static int ip_rt_secret_interval = 10 * 60 * HZ;
-static int ip_rt_flush_expected;
-static unsigned long rt_deadline;
#define RTprint(a...) printk(KERN_DEBUG a)
-static struct timer_list rt_flush_timer;
static void rt_worker_func(struct work_struct *work);
static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
static struct timer_list rt_secret_timer;
@@ -169,6 +164,7 @@ static struct dst_ops ipv4_dst_ops = {
.update_pmtu = ip_rt_update_pmtu,
.local_out = ip_local_out,
.entry_size = sizeof(struct rtable),
+ .entries = ATOMIC_INIT(0),
};
#define ECN_OR_COST(class) TC_PRIO_##class
@@ -259,19 +255,16 @@ static inline void rt_hash_lock_init(void)
static struct rt_hash_bucket *rt_hash_table;
static unsigned rt_hash_mask;
static unsigned int rt_hash_log;
-static unsigned int rt_hash_rnd;
+static atomic_t rt_genid;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
(__raw_get_cpu_var(rt_cache_stat).field++)
-static int rt_intern_hash(unsigned hash, struct rtable *rth,
- struct rtable **res);
-
static unsigned int rt_hash_code(u32 daddr, u32 saddr)
{
- return (jhash_2words(daddr, saddr, rt_hash_rnd)
- & rt_hash_mask);
+ return jhash_2words(daddr, saddr, atomic_read(&rt_genid))
+ & rt_hash_mask;
}
#define rt_hash(daddr, saddr, idx) \
@@ -281,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
int bucket;
+ int genid;
};
-static struct rtable *rt_cache_get_first(struct seq_file *seq)
+static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
{
struct rtable *r = NULL;
- struct rt_cache_iter_state *st = seq->private;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
- if (r)
- break;
+ r = rcu_dereference(rt_hash_table[st->bucket].chain);
+ while (r) {
+ if (r->rt_genid == st->genid)
+ return r;
+ r = rcu_dereference(r->u.dst.rt_next);
+ }
rcu_read_unlock_bh();
}
- return rcu_dereference(r);
+ return r;
}
-static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
+static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r)
{
- struct rt_cache_iter_state *st = seq->private;
-
r = r->u.dst.rt_next;
while (!r) {
rcu_read_unlock_bh();
@@ -313,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
return rcu_dereference(r);
}
-static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
+static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos)
{
- struct rtable *r = rt_cache_get_first(seq);
+ struct rtable *r = rt_cache_get_first(st);
if (r)
- while (pos && (r = rt_cache_get_next(seq, r)))
+ while (pos && (r = rt_cache_get_next(st, r))) {
+ if (r->rt_genid != st->genid)
+ continue;
--pos;
+ }
return pos ? NULL : r;
}
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
- return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ struct rt_cache_iter_state *st = seq->private;
+
+ if (*pos)
+ return rt_cache_get_idx(st, *pos - 1);
+ st->genid = atomic_read(&rt_genid);
+ return SEQ_START_TOKEN;
}
static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct rtable *r = NULL;
+ struct rtable *r;
+ struct rt_cache_iter_state *st = seq->private;
if (v == SEQ_START_TOKEN)
- r = rt_cache_get_first(seq);
+ r = rt_cache_get_first(st);
else
- r = rt_cache_get_next(seq, v);
+ r = rt_cache_get_next(st, v);
++*pos;
return r;
}
@@ -708,6 +711,11 @@ static void rt_check_expire(void)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
+ if (rth->rt_genid != atomic_read(&rt_genid)) {
+ *rthp = rth->u.dst.rt_next;
+ rt_free(rth);
+ continue;
+ }
if (rth->u.dst.expires) {
/* Entry is expired even if it is in use */
if (time_before_eq(jiffies, rth->u.dst.expires)) {
@@ -732,83 +740,45 @@ static void rt_check_expire(void)
/*
* rt_worker_func() is run in process context.
- * If a whole flush was scheduled, it is done.
- * Else, we call rt_check_expire() to scan part of the hash table
+ * we call rt_check_expire() to scan part of the hash table
*/
static void rt_worker_func(struct work_struct *work)
{
- if (ip_rt_flush_expected) {
- ip_rt_flush_expected = 0;
- rt_do_flush(1);
- } else
- rt_check_expire();
+ rt_check_expire();
schedule_delayed_work(&expires_work, ip_rt_gc_interval);
}
-/* This can run from both BH and non-BH contexts, the latter
- * in the case of a forced flush event.
+/*
+ * Pertubation of rt_genid by a small quantity [1..256]
+ * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
+ * many times (2^24) without giving recent rt_genid.
+ * Jenkins hash is strong enough that litle changes of rt_genid are OK.
*/
-static void rt_run_flush(unsigned long process_context)
+static void rt_cache_invalidate(void)
{
- rt_deadline = 0;
-
- get_random_bytes(&rt_hash_rnd, 4);
+ unsigned char shuffle;
- rt_do_flush(process_context);
+ get_random_bytes(&shuffle, sizeof(shuffle));
+ atomic_add(shuffle + 1U, &rt_genid);
}
-static DEFINE_SPINLOCK(rt_flush_lock);
-
+/*
+ * delay < 0 : invalidate cache (fast : entries will be deleted later)
+ * delay >= 0 : invalidate & flush cache (can be long)
+ */
void rt_cache_flush(int delay)
{
- unsigned long now = jiffies;
- int user_mode = !in_softirq();
-
- if (delay < 0)
- delay = ip_rt_min_delay;
-
- spin_lock_bh(&rt_flush_lock);
-
- if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
- long tmo = (long)(rt_deadline - now);
-
- /* If flush timer is already running
- and flush request is not immediate (delay > 0):
-
- if deadline is not achieved, prolongate timer to "delay",
- otherwise fire it at deadline time.
- */
-
- if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
- tmo = 0;
-
- if (delay > tmo)
- delay = tmo;
- }
-
- if (delay <= 0) {
- spin_unlock_bh(&rt_flush_lock);
- rt_run_flush(user_mode);
- return;
- }
-
- if (rt_deadline == 0)
- rt_deadline = now + ip_rt_max_delay;
-
- mod_timer(&rt_flush_timer, now+delay);
- spin_unlock_bh(&rt_flush_lock);
+ rt_cache_invalidate();
+ if (delay >= 0)
+ rt_do_flush(!in_softirq());
}
/*
- * We change rt_hash_rnd and ask next rt_worker_func() invocation
- * to perform a flush in process context
+ * We change rt_genid and let gc do the cleanup
*/
static void rt_secret_rebuild(unsigned long dummy)
{
- get_random_bytes(&rt_hash_rnd, 4);
- ip_rt_flush_expected = 1;
- cancel_delayed_work(&expires_work);
- schedule_delayed_work(&expires_work, HZ/10);
+ rt_cache_invalidate();
mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
}
@@ -885,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
while ((rth = *rthp) != NULL) {
- if (!rt_may_expire(rth, tmo, expire)) {
+ if (rth->rt_genid == atomic_read(&rt_genid) &&
+ !rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
continue;
@@ -966,6 +937,11 @@ restart:
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
+ if (rth->rt_genid != atomic_read(&rt_genid)) {
+ *rthp = rth->u.dst.rt_next;
+ rt_free(rth);
+ continue;
+ }
if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
/* Put it first */
*rthp = rth->u.dst.rt_next;
@@ -1131,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
static void rt_del(unsigned hash, struct rtable *rt)
{
- struct rtable **rthp;
+ struct rtable **rthp, *aux;
+ rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
- for (rthp = &rt_hash_table[hash].chain; *rthp;
- rthp = &(*rthp)->u.dst.rt_next)
- if (*rthp == rt) {
- *rthp = rt->u.dst.rt_next;
- rt_free(rt);
- break;
+ while ((aux = *rthp) != NULL) {
+ if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
+ *rthp = aux->u.dst.rt_next;
+ rt_free(aux);
+ continue;
}
+ rthp = &aux->u.dst.rt_next;
+ }
spin_unlock_bh(rt_hash_lock_addr(hash));
}
@@ -1186,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
if (rth->fl.fl4_dst != daddr ||
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
- rth->fl.iif != 0) {
+ rth->fl.iif != 0 ||
+ rth->rt_genid != atomic_read(&rt_genid)) {
rthp = &rth->u.dst.rt_next;
continue;
}
@@ -1224,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->u.dst.neighbour = NULL;
rt->u.dst.hh = NULL;
rt->u.dst.xfrm = NULL;
-
+ rt->rt_genid = atomic_read(&rt_genid);
rt->rt_flags |= RTCF_REDIRECTED;
/* Gateway is different ... */
@@ -1445,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
rth->rt_src == iph->saddr &&
rth->fl.iif == 0 &&
!(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
- rth->u.dst.dev->nd_net == net) {
+ rth->u.dst.dev->nd_net == net &&
+ rth->rt_genid == atomic_read(&rt_genid)) {
unsigned short mtu = new_mtu;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1680,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.oif = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->rt_type = RTN_MULTICAST;
+ rth->rt_genid = atomic_read(&rt_genid);
rth->rt_flags = RTCF_MULTICAST;
+ rth->rt_type = RTN_MULTICAST;
if (our) {
rth->u.dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1820,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
rth->u.dst.input = ip_forward;
rth->u.dst.output = ip_output;
+ rth->rt_genid = atomic_read(&rt_genid);
rt_set_nexthop(rth, res, itag);
@@ -1980,6 +1962,7 @@ local_input:
goto e_nobufs;
rth->u.dst.output= ip_rt_bug;
+ rth->rt_genid = atomic_read(&rt_genid);
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
@@ -2071,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.oif == 0 &&
rth->fl.mark == skb->mark &&
rth->fl.fl4_tos == tos &&
- rth->u.dst.dev->nd_net == net) {
+ rth->u.dst.dev->nd_net == net &&
+ rth->rt_genid == atomic_read(&rt_genid)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
@@ -2199,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result,
rth->rt_spec_dst= fl->fl4_src;
rth->u.dst.output=ip_output;
+ rth->rt_genid = atomic_read(&rt_genid);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2471,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
- rth->u.dst.dev->nd_net == net) {
+ rth->u.dst.dev->nd_net == net &&
+ rth->rt_genid == atomic_read(&rt_genid)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -2498,6 +2484,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.check = ipv4_dst_check,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
.entry_size = sizeof(struct rtable),
+ .entries = ATOMIC_INIT(0),
};
@@ -2525,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
rt->idev = ort->idev;
if (rt->idev)
in_dev_hold(rt->idev);
+ rt->rt_genid = atomic_read(&rt_genid);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
@@ -2779,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
if (idx < s_idx)
continue;
+ if (rt->rt_genid != atomic_read(&rt_genid))
+ continue;
skb->dst = dst_clone(&rt->u.dst);
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
@@ -2848,24 +2838,6 @@ ctl_table ipv4_route_table[] = {
.strategy = &ipv4_sysctl_rtcache_flush_strategy,
},
{
- .ctl_name = NET_IPV4_ROUTE_MIN_DELAY,
- .procname = "min_delay",
- .data = &ip_rt_min_delay,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
- },
- {
- .ctl_name = NET_IPV4_ROUTE_MAX_DELAY,
- .procname = "max_delay",
- .data = &ip_rt_max_delay,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
- },
- {
.ctl_name = NET_IPV4_ROUTE_GC_THRESH,
.procname = "gc_thresh",
.data = &ipv4_dst_ops.gc_thresh,
@@ -3023,8 +2995,8 @@ int __init ip_rt_init(void)
{
int rc = 0;
- rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7)));
+ atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
+ (jiffies ^ (jiffies >> 7))));
#ifdef CONFIG_NET_CLS_ROUTE
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
@@ -3057,7 +3029,6 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
- setup_timer(&rt_flush_timer, rt_run_flush, 0);
setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
/* All the timers, started at system startup tend
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 82cdf23837e3..88286f35d1e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -185,7 +185,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
- if (ret == 0 && newval && newlen)
+ if (ret == 1 && newval && newlen)
ret = tcp_set_allowed_congestion_control(tbl.data);
kfree(tbl.data);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fa2c85ca5bc3..19c449f62672 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2153,7 +2153,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
- if (tcp_is_fack(tp) ||
+ if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9aea88b8d4fc..77c1939a2b0d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
return;
}
- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
- th->source, inet_iif(skb));
+ sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest,
+ iph->saddr, th->source, inet_iif(skb));
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
@@ -1503,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
- iph->daddr, th->dest, inet_iif(skb));
+ nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
+ th->source, iph->daddr, th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1661,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
- iph->daddr, th->dest, inet_iif(skb));
+ sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr,
+ th->source, iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1735,7 +1735,8 @@ do_time_wait:
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+ struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net,
+ &tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 89f0188885c7..ed750f9ceb07 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2564,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
EXPORT_SYMBOL(tcp_sync_mss);
-EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2fb8d731026b..7ea1b67b6de1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -130,14 +130,14 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
atomic_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
-static inline int __udp_lib_lport_inuse(__u16 num,
+static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
const struct hlist_head udptable[])
{
struct sock *sk;
struct hlist_node *node;
sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
- if (sk->sk_hash == num)
+ if (sk->sk_net == net && sk->sk_hash == num)
return 1;
return 0;
}
@@ -159,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
struct hlist_head *head;
struct sock *sk2;
int error = 1;
+ struct net *net = sk->sk_net;
write_lock_bh(&udp_hash_lock);
@@ -198,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
/* 2nd pass: find hole in shortest hash chain */
rover = best;
for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
- if (! __udp_lib_lport_inuse(rover, udptable))
+ if (! __udp_lib_lport_inuse(net, rover, udptable))
goto gotit;
rover += UDP_HTABLE_SIZE;
if (rover > high)
@@ -218,6 +219,7 @@ gotit:
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
sk2 != sk &&
+ sk2->sk_net == net &&
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -261,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport,
- int dif, struct hlist_head udptable[])
+static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+ __be16 sport, __be32 daddr, __be16 dport,
+ int dif, struct hlist_head udptable[])
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
@@ -274,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+ if (sk->sk_net == net && sk->sk_hash == hnum &&
+ !ipv6_only_sock(sk)) {
int score = (sk->sk_family == PF_INET ? 1 : 0);
if (inet->rcv_saddr) {
if (inet->rcv_saddr != daddr)
@@ -361,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
int harderr;
int err;
- sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
- skb->dev->ifindex, udptable );
+ sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
+ iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -1185,8 +1188,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
- sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
- inet_iif(skb), udptable);
+ sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
+ uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
int ret = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3783e3ee56a4..10ed70491434 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -247,6 +247,7 @@ static struct dst_ops xfrm4_dst_ops = {
.local_out = __ip_local_out,
.gc_thresh = 1024,
.entry_size = sizeof(struct xfrm_dst),
+ .entries = ATOMIC_INIT(0),
};
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 326845195620..41f5982d2087 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x)
{
}
-static struct xfrm_type ipip_type = {
+static const struct xfrm_type ipip_type = {
.description = "IPIP",
.owner = THIS_MODULE,
.proto = IPPROTO_IPIP,
@@ -50,7 +50,7 @@ static struct xfrm_type ipip_type = {
static int xfrm_tunnel_rcv(struct sk_buff *skb)
{
- return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr);
+ return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
}
static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)