From e9dc86534051b78e41e5b746cccc291b57a3a311 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Sep 2007 13:02:17 +0200 Subject: [NET]: Make device event notification network namespace safe Every user of the network device notifiers is either a protocol stack or a pseudo device. If a protocol stack that does not have support for multiple network namespaces receives an event for a device that is not in the initial network namespace it quite possibly can get confused and do the wrong thing. To avoid problems until all of the protocol stacks are converted this patch modifies all netdev event handlers to ignore events on devices that are not in the initial network namespace. As the rest of the code is made network namespace aware these checks can be removed. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eff6bce453ee..cefb55ec3d62 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -860,6 +860,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; -- cgit v1.2.3 From b4b510290b056b86611757ce1175a230f1080f53 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Sep 2007 13:05:38 +0200 Subject: [NET]: Support multiple network namespaces with netlink Each netlink socket will live in exactly one network namespace, this includes the controlling kernel sockets. This patch updates all of the existing netlink protocols to only support the initial network namespace. Request by clients in other namespaces will get -ECONREFUSED. As they would if the kernel did not have the support for that netlink protocol compiled in. As each netlink protocol is updated to be multiple network namespace safe it can register multiple kernel sockets to acquire a presence in the rest of the network namespaces. The implementation in af_netlink is a simple filter implementation at hash table insertion and hash table look up time. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cefb55ec3d62..140bf7a8d877 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -816,8 +816,8 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, + NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) -- cgit v1.2.3 From 881d966b48b035ab3f3aeaae0f3d3f9b584f45b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 17 Sep 2007 11:56:21 -0700 Subject: [NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 140bf7a8d877..df17aab193b4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -334,7 +334,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt, colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(&init_net, devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; -- cgit v1.2.3 From 8f4c1f9b049df3be11090f1c2c4738700302acae Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 12 Sep 2007 14:44:36 +0200 Subject: [NETLINK]: Introduce nested and byteorder flag to netlink attribute This change allows the generic attribute interface to be used within the netfilter subsystem where this flag was initially introduced. The byte-order flag is yet unused, it's intended use is to allow automatic byte order convertions for all atomic types. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index df17aab193b4..f823ca34cb12 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -487,7 +487,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { - switch (attr->nla_type) { + switch (nla_type(attr)) { case RTA_DST: cfg->fc_dst = nla_get_be32(attr); break; -- cgit v1.2.3 From cd40b7d3983c708aabe3d3008ec64ffce56d33b0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 10 Oct 2007 21:15:29 -0700 Subject: [NET]: make netlink user -> kernel interface synchronious This patch make processing netlink user -> kernel messages synchronious. This change was inspired by the talk with Alexey Kuznetsov about current netlink messages processing. He says that he was badly wrong when introduced asynchronious user -> kernel communication. The call netlink_unicast is the only path to send message to the kernel netlink socket. But, unfortunately, it is also used to send data to the user. Before this change the user message has been attached to the socket queue and sk->sk_data_ready was called. The process has been blocked until all pending messages were processed. The bad thing is that this processing may occur in the arbitrary process context. This patch changes nlk->data_ready callback to get 1 skb and force packet processing right in the netlink_unicast. Kernel -> user path in netlink_unicast remains untouched. EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock drop, but the process remains in the cycle until the message will be fully processed. So, there is no need to use this kludges now. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f823ca34cb12..a5cba2349605 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -62,6 +62,9 @@ static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +static struct sock *fibnl = NULL; + + struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; @@ -811,13 +814,13 @@ static void nl_fib_input(struct sock *sk, int len) pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); } static void nl_fib_lookup_init(void) { - netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, - NULL, THIS_MODULE); + fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, + nl_fib_input, NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) -- cgit v1.2.3 From 28f7b0360f46eeb9eeee63d03bb5484918a54837 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Oct 2007 21:32:39 -0700 Subject: [NETLINK]: fib_frontend build fixes 1) fibnl needs to be declared outside of config ifdefs, and also should not be explicitly initialized to NULL 2) nl_fib_input() args are wrong for netlink_kernel_create() input method Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net/ipv4/fib_frontend.c') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a5cba2349605..78b514ba1414 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -49,6 +49,8 @@ #define FFprint(a...) printk(KERN_DEBUG a) +static struct sock *fibnl; + #ifndef CONFIG_IP_MULTIPLE_TABLES struct fib_table *ip_fib_local_table; @@ -62,9 +64,6 @@ static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -static struct sock *fibnl = NULL; - - struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; @@ -787,17 +786,12 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) } } -static void nl_fib_input(struct sock *sk, int len) +static void nl_fib_input(struct sk_buff *skb) { - struct sk_buff *skb = NULL; - struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; - u32 pid; + struct nlmsghdr *nlh; struct fib_table *tb; - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - return; + u32 pid; nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || -- cgit v1.2.3