1 files changed, 371 insertions, 0 deletions
diff --git a/net/core/flow.c b/net/core/flow.c
new file mode 100644
index 000000000000..f289570b15a3
--- /dev/null
+++ b/net/core/flow.c
@@ -0,0 +1,371 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <net/flow.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+struct flow_cache_entry {
+	struct flow_cache_entry	*next;
+	u16			family;
+	u8			dir;
+	struct flowi		key;
+	u32			genid;
+	void			*object;
+	atomic_t		*object_ref;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+
+static u32 flow_hash_shift;
+#define flow_hash_size	(1 << flow_hash_shift)
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+
+#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+
+static kmem_cache_t *flow_cachep;
+
+static int flow_lwm, flow_hwm;
+
+struct flow_percpu_info {
+	int hash_rnd_recalc;
+	u32 hash_rnd;
+	int count;
+} ____cacheline_aligned;
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
+
+#define flow_hash_rnd_recalc(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+#define flow_hash_rnd(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd)
+#define flow_count(cpu) \
+	(per_cpu(flow_hash_info, cpu).count)
+
+static struct timer_list flow_hash_rnd_timer;
+
+#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+
+struct flow_flush_info {
+	atomic_t cpuleft;
+	struct completion completion;
+};
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+
+#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	int i;
+
+	for_each_cpu(i)
+		flow_hash_rnd_recalc(i) = 1;
+
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+}
+
+static void __flow_cache_shrink(int cpu, int shrink_to)
+{
+	struct flow_cache_entry *fle, **flp;
+	int i;
+
+	for (i = 0; i < flow_hash_size; i++) {
+		int k = 0;
+
+		flp = &flow_table(cpu)[i];
+		while ((fle = *flp) != NULL && k < shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle = *flp) != NULL) {
+			*flp = fle->next;
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+			kmem_cache_free(flow_cachep, fle);
+			flow_count(cpu)--;
+		}
+	}
+}
+
+static void flow_cache_shrink(int cpu)
+{
+	int shrink_to = flow_lwm / flow_hash_size;
+
+	__flow_cache_shrink(cpu, shrink_to);
+}
+
+static void flow_new_hash_rnd(int cpu)
+{
+	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+	flow_hash_rnd_recalc(cpu) = 0;
+
+	__flow_cache_shrink(cpu, 0);
+}
+
+static u32 flow_hash_code(struct flowi *key, int cpu)
+{
+	u32 *k = (u32 *) key;
+
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+		(flow_hash_size - 1));
+}
+
+#if (BITS_PER_LONG == 64)
+typedef u64 flow_compare_t;
+#else
+typedef u32 flow_compare_t;
+#endif
+
+extern void flowi_is_missized(void);
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment and
+ * constant size.
+ */
+static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+{
+	flow_compare_t *k1, *k1_lim, *k2;
+	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
+
+	if (sizeof(struct flowi) % sizeof(flow_compare_t))
+		flowi_is_missized();
+
+	k1 = (flow_compare_t *) key1;
+	k1_lim = k1 + n_elem;
+
+	k2 = (flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			flow_resolve_t resolver)
+{
+	struct flow_cache_entry *fle, **head;
+	unsigned int hash;
+	int cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+
+	fle = NULL;
+	/* Packet really early in init?  Making flow_cache_init a
+	 * pre-smp initcall would solve this.  --RR */
+	if (!flow_table(cpu))
+		goto nocache;
+
+	if (flow_hash_rnd_recalc(cpu))
+		flow_new_hash_rnd(cpu);
+	hash = flow_hash_code(key, cpu);
+
+	head = &flow_table(cpu)[hash];
+	for (fle = *head; fle; fle = fle->next) {
+		if (fle->family == family &&
+		    fle->dir == dir &&
+		    flow_key_compare(key, &fle->key) == 0) {
+			if (fle->genid == atomic_read(&flow_cache_genid)) {
+				void *ret = fle->object;
+
+				if (ret)
+					atomic_inc(fle->object_ref);
+				local_bh_enable();
+
+				return ret;
+			}
+			break;
+		}
+	}
+
+	if (!fle) {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+		if (fle) {
+			fle->next = *head;
+			*head = fle;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, sizeof(*key));
+			fle->object = NULL;
+			flow_count(cpu)++;
+		}
+	}
+
+nocache:
+	{
+		void *obj;
+		atomic_t *obj_ref;
+
+		resolver(key, family, dir, &obj, &obj_ref);
+
+		if (fle) {
+			fle->genid = atomic_read(&flow_cache_genid);
+
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+
+			fle->object = obj;
+			fle->object_ref = obj_ref;
+			if (obj)
+				atomic_inc(fle->object_ref);
+		}
+		local_bh_enable();
+
+		return obj;
+	}
+}
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	int i;
+	int cpu;
+
+	cpu = smp_processor_id();
+	for (i = 0; i < flow_hash_size; i++) {
+		struct flow_cache_entry *fle;
+
+		fle = flow_table(cpu)[i];
+		for (; fle; fle = fle->next) {
+			unsigned genid = atomic_read(&flow_cache_genid);
+
+			if (!fle->object || fle->genid == genid)
+				continue;
+
+			fle->object = NULL;
+			atomic_dec(fle->object_ref);
+		}
+	}
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet->data = (unsigned long)info;
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+	static DECLARE_MUTEX(flow_flush_sem);
+
+	/* Don't want cpus going down or up during this. */
+	lock_cpu_hotplug();
+	down(&flow_flush_sem);
+	atomic_set(&info.cpuleft, num_online_cpus());
+	init_completion(&info.completion);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	flow_cache_flush_tasklet((unsigned long)&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+	up(&flow_flush_sem);
+	unlock_cpu_hotplug();
+}
+
+static void __devinit flow_cache_cpu_prepare(int cpu)
+{
+	struct tasklet_struct *tasklet;
+	unsigned long order;
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+	     order++)
+		/* NOTHING */;
+
+	flow_table(cpu) = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL, order);
+	if (!flow_table(cpu))
+		panic("NET: failed to allocate flow cache order %lu\n", order);
+
+	memset(flow_table(cpu), 0, PAGE_SIZE << order);
+
+	flow_hash_rnd_recalc(cpu) = 1;
+	flow_count(cpu) = 0;
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int flow_cache_cpu(struct notifier_block *nfb,
+			  unsigned long action,
+			  void *hcpu)
+{
+	if (action == CPU_DEAD)
+		__flow_cache_shrink((unsigned long)hcpu, 0);
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init flow_cache_init(void)
+{
+	int i;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+
+	if (!flow_cachep)
+		panic("NET: failed to allocate flow cache slab\n");
+
+	flow_hash_shift = 10;
+	flow_lwm = 2 * flow_hash_size;
+	flow_hwm = 4 * flow_hash_size;
+
+	init_timer(&flow_hash_rnd_timer);
+	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+
+	for_each_cpu(i)
+		flow_cache_cpu_prepare(i);
+
+	hotcpu_notifier(flow_cache_cpu, 0);
+	return 0;
+}
+
+module_init(flow_cache_init);
+
+EXPORT_SYMBOL(flow_cache_genid);
+EXPORT_SYMBOL(flow_cache_lookup);