From bbc2e3ef87851bc5430b2b4cf4ca3a2f29baeda6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 19 Oct 2012 13:56:53 -0700 Subject: pidns: remove recursion from free_pid_ns() free_pid_ns() operates in a recursive fashion: free_pid_ns(parent) put_pid_ns(parent) kref_put(&ns->kref, free_pid_ns); free_pid_ns thus if there was a huge nesting of namespaces the userspace may trigger avalanche calling of free_pid_ns leading to kernel stack exhausting and a panic eventually. This patch turns the recursion into an iterative loop. Based on a patch by Andrew Vagin. [akpm@linux-foundation.org: export put_pid_ns() to modules] Signed-off-by: Cyrill Gorcunov Cc: Andrew Vagin Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/pid_namespace.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'kernel/pid_namespace.c') diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 478bad2745e3..eb00be205811 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -133,19 +133,26 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old return create_pid_namespace(old_ns); } -void free_pid_ns(struct kref *kref) +static void free_pid_ns(struct kref *kref) { - struct pid_namespace *ns, *parent; + struct pid_namespace *ns; ns = container_of(kref, struct pid_namespace, kref); - - parent = ns->parent; destroy_pid_namespace(ns); +} - if (parent != NULL) - put_pid_ns(parent); +void put_pid_ns(struct pid_namespace *ns) +{ + struct pid_namespace *parent; + + while (ns != &init_pid_ns) { + parent = ns->parent; + if (!kref_put(&ns->kref, free_pid_ns)) + break; + ns = parent; + } } -EXPORT_SYMBOL_GPL(free_pid_ns); +EXPORT_SYMBOL_GPL(put_pid_ns); void zap_pid_ns_processes(struct pid_namespace *pid_ns) { -- cgit v1.2.3 From f2302505775fd13ba93f034206f1e2a587017929 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Thu, 25 Oct 2012 13:38:07 -0700 Subject: pidns: limit the nesting depth of pid namespaces 'struct pid' is a "variable sized struct" - a header with an array of upids at the end. The size of the array depends on a level (depth) of pid namespaces. Now a level of pidns is not limited, so 'struct pid' can be more than one page. Looks reasonable, that it should be less than a page. MAX_PIS_NS_LEVEL is not calculated from PAGE_SIZE, because in this case it depends on architectures, config options and it will be reduced, if someone adds a new fields in struct pid or struct upid. I suggest to set MAX_PIS_NS_LEVEL = 32, because it saves ability to expand "struct pid" and it's more than enough for all known for me use-cases. When someone finds a reasonable use case, we can add a config option or a sysctl parameter. In addition it will reduce the effect of another problem, when we have many nested namespaces and the oldest one starts dying. zap_pid_ns_processe will be called for each namespace and find_vpid will be called for each process in a namespace. find_vpid will be called minimum max_level^2 / 2 times. The reason of that is that when we found a bit in pidmap, we can't determine this pidns is top for this process or it isn't. vpid is a heavy operation, so a fork bomb, which create many nested namespace, can make a system inaccessible for a long time. For example my system becomes inaccessible for a few minutes with 4000 processes. [akpm@linux-foundation.org: return -EINVAL in response to excessive nesting, not -ENOMEM] Signed-off-by: Andrew Vagin Acked-by: Oleg Nesterov Cc: Cyrill Gorcunov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/pid_namespace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel/pid_namespace.c') diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index eb00be205811..7b07cc0dfb75 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -71,12 +71,22 @@ err_alloc: return NULL; } +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 + static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; - int i, err = -ENOMEM; + int i; + int err; + + if (level > MAX_PID_NS_LEVEL) { + err = -EINVAL; + goto out; + } + err = -ENOMEM; ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); if (ns == NULL) goto out; -- cgit v1.2.3