summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-06-07 08:48:57 -0700
committerDavid S. Miller <davem@davemloft.net>2013-06-12 00:25:23 -0700
commite9897071350bd9d94a56b5b6f79c85b1a98fc7e7 (patch)
treed01026afb4450ef685722cd7a0fe1815336170ca
parent64153ce0a7b61b2a5cacb01805cbf670142339e9 (diff)
igmp: hash a hash table to speedup ip_check_mc_rcu()
After IP route cache removal, multicast applications using a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu() Add a per in_device hash table to get faster lookup. This hash table is created only if the number of items in mc_list is above 4. Reported-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Tested-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/igmp.h1
-rw-r--r--include/linux/inetdevice.h5
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/igmp.c73
4 files changed, 77 insertions, 3 deletions
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7f2bf1518480..e3362b5f13e8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -84,6 +84,7 @@ struct ip_mc_list {
struct ip_mc_list *next;
struct ip_mc_list __rcu *next_rcu;
};
+ struct ip_mc_list __rcu *next_hash;
struct timer_list timer;
int users;
atomic_t refcnt;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ea1e3b863890..b99cd23f3474 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -50,12 +50,17 @@ struct ipv4_devconf {
DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
};
+#define MC_HASH_SZ_LOG 9
+
struct in_device {
struct net_device *dev;
atomic_t refcnt;
int dead;
struct in_ifaddr *ifa_list; /* IP ifaddr chain */
+
struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */
+ struct ip_mc_list __rcu * __rcu *mc_hash;
+
int mc_count; /* Number of installed mcasts */
spinlock_t mc_tomb_lock;
struct ip_mc_list *mc_tomb;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b047e2d8a614..3469506c106d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev)
WARN_ON(idev->ifa_list);
WARN_ON(idev->mc_list);
+ kfree(rcu_dereference_protected(idev->mc_hash, 1));
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 450f625361e4..f72011df9c59 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im)
* Multicast list managers
*/
+static u32 ip_mc_hash(const struct ip_mc_list *im)
+{
+ return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG);
+}
+
+static void ip_mc_hash_add(struct in_device *in_dev,
+ struct ip_mc_list *im)
+{
+ struct ip_mc_list __rcu **mc_hash;
+ u32 hash;
+
+ mc_hash = rtnl_dereference(in_dev->mc_hash);
+ if (mc_hash) {
+ hash = ip_mc_hash(im);
+ im->next_hash = rtnl_dereference(mc_hash[hash]);
+ rcu_assign_pointer(mc_hash[hash], im);
+ return;
+ }
+
+ /* do not use a hash table for small number of items */
+ if (in_dev->mc_count < 4)
+ return;
+
+ mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG,
+ GFP_KERNEL);
+ if (!mc_hash)
+ return;
+
+ for_each_pmc_rtnl(in_dev, im) {
+ hash = ip_mc_hash(im);
+ im->next_hash = rtnl_dereference(mc_hash[hash]);
+ RCU_INIT_POINTER(mc_hash[hash], im);
+ }
+
+ rcu_assign_pointer(in_dev->mc_hash, mc_hash);
+}
+
+static void ip_mc_hash_remove(struct in_device *in_dev,
+ struct ip_mc_list *im)
+{
+ struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash);
+ struct ip_mc_list *aux;
+
+ if (!mc_hash)
+ return;
+ mc_hash += ip_mc_hash(im);
+ while ((aux = rtnl_dereference(*mc_hash)) != im)
+ mc_hash = &aux->next_hash;
+ *mc_hash = im->next_hash;
+}
+
/*
* A socket has joined a multicast group on device dev.
@@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
in_dev->mc_count++;
rcu_assign_pointer(in_dev->mc_list, im);
+ ip_mc_hash_add(in_dev, im);
+
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im->multiaddr);
#endif
@@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
ip = &i->next_rcu) {
if (i->multiaddr == addr) {
if (--i->users == 0) {
+ ip_mc_hash_remove(in_dev, i);
*ip = i->next_rcu;
in_dev->mc_count--;
igmp_group_dropped(i);
@@ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk)
int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
{
struct ip_mc_list *im;
+ struct ip_mc_list __rcu **mc_hash;
struct ip_sf_list *psf;
int rv = 0;
- for_each_pmc_rcu(in_dev, im) {
- if (im->multiaddr == mc_addr)
- break;
+ mc_hash = rcu_dereference(in_dev->mc_hash);
+ if (mc_hash) {
+ u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG);
+
+ for (im = rcu_dereference(mc_hash[hash]);
+ im != NULL;
+ im = rcu_dereference(im->next_hash)) {
+ if (im->multiaddr == mc_addr)
+ break;
+ }
+ } else {
+ for_each_pmc_rcu(in_dev, im) {
+ if (im->multiaddr == mc_addr)
+ break;
+ }
}
if (im && proto == IPPROTO_IGMP) {
rv = 1;