From 24e377a83220ef05c9b5bec7e01d65eed6609aa6 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:09 +0200 Subject: sched: add fair-user scheduler Enable user-id based fair group scheduling. This is useful for anyone who wants to test the group scheduler w/o having to enable CONFIG_CGROUPS. A separate scheduling group (i.e struct task_grp) is automatically created for every new user added to the system. Upon uid change for a task, it is made to move to the corresponding scheduling group. A /proc tunable (/proc/root_user_share) is also provided to tune root user's quota of cpu bandwidth. Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/user.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'kernel/user.c') diff --git a/kernel/user.c b/kernel/user.c index 9ca2848fc356..c6387fac932d 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -50,8 +50,41 @@ struct user_struct root_user = { .uid_keyring = &root_user_keyring, .session_keyring = &root_session_keyring, #endif +#ifdef CONFIG_FAIR_USER_SCHED + .tg = &init_task_grp, +#endif }; +#ifdef CONFIG_FAIR_USER_SCHED +static void sched_destroy_user(struct user_struct *up) +{ + sched_destroy_group(up->tg); +} + +static int sched_create_user(struct user_struct *up) +{ + int rc = 0; + + up->tg = sched_create_group(); + if (IS_ERR(up->tg)) + rc = -ENOMEM; + + return rc; +} + +static void sched_switch_user(struct task_struct *p) +{ + sched_move_task(p); +} + +#else /* CONFIG_FAIR_USER_SCHED */ + +static void sched_destroy_user(struct user_struct *up) { } +static int sched_create_user(struct user_struct *up) { return 0; } +static void sched_switch_user(struct task_struct *p) { } + +#endif /* CONFIG_FAIR_USER_SCHED */ + /* * These routines must be called with the uidhash spinlock held! */ @@ -109,6 +142,7 @@ void free_uid(struct user_struct *up) if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); + sched_destroy_user(up); key_put(up->uid_keyring); key_put(up->session_keyring); kmem_cache_free(uid_cachep, up); @@ -150,6 +184,13 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) return NULL; } + if (sched_create_user(new) < 0) { + key_put(new->uid_keyring); + key_put(new->session_keyring); + kmem_cache_free(uid_cachep, new); + return NULL; + } + /* * Before adding this, check whether we raced * on adding the same user already.. @@ -157,6 +198,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { + sched_destroy_user(new); key_put(new->uid_keyring); key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); @@ -184,6 +226,7 @@ void switch_uid(struct user_struct *new_user) atomic_dec(&old_user->processes); switch_uid_keyring(new_user); current->user = new_user; + sched_switch_user(current); /* * We need to synchronize with __sigqueue_alloc() -- cgit v1.2.3 From 4cf86d77f5942336e7cd9de874b38b3c83b54d5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: cleanup: rename task_grp to task_group cleanup: rename task_grp to task_group. No need to save two characters and 'grp' is annoying to read. Signed-off-by: Ingo Molnar --- kernel/user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/user.c') diff --git a/kernel/user.c b/kernel/user.c index c6387fac932d..0c9a7870d08f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,7 +51,7 @@ struct user_struct root_user = { .session_keyring = &root_session_keyring, #endif #ifdef CONFIG_FAIR_USER_SCHED - .tg = &init_task_grp, + .tg = &init_task_group, #endif }; -- cgit v1.2.3 From 5cb350baf580017da38199625b7365b1763d7180 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: group scheduling, sysfs tunables Add tunables in sysfs to modify a user's cpu share. A directory is created in sysfs for each new user in the system. /sys/kernel/uids//cpu_share Reading this file returns the cpu shares granted for the user. Writing into this file modifies the cpu share for the user. Only an administrator is allowed to modify a user's cpu share. Ex: # cd /sys/kernel/uids/ # cat 512/cpu_share 1024 # echo 2048 > 512/cpu_share # cat 512/cpu_share 2048 # Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- kernel/user.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 210 insertions(+), 30 deletions(-) (limited to 'kernel/user.c') diff --git a/kernel/user.c b/kernel/user.c index 0c9a7870d08f..74cadea8466f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -55,7 +55,41 @@ struct user_struct root_user = { #endif }; +/* + * These routines must be called with the uidhash spinlock held! + */ +static inline void uid_hash_insert(struct user_struct *up, + struct hlist_head *hashent) +{ + hlist_add_head(&up->uidhash_node, hashent); +} + +static inline void uid_hash_remove(struct user_struct *up) +{ + hlist_del_init(&up->uidhash_node); +} + +static inline struct user_struct *uid_hash_find(uid_t uid, + struct hlist_head *hashent) +{ + struct user_struct *user; + struct hlist_node *h; + + hlist_for_each_entry(user, h, hashent, uidhash_node) { + if (user->uid == uid) { + atomic_inc(&user->__count); + return user; + } + } + + return NULL; +} + #ifdef CONFIG_FAIR_USER_SCHED + +static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */ +static DEFINE_MUTEX(uids_mutex); + static void sched_destroy_user(struct user_struct *up) { sched_destroy_group(up->tg); @@ -77,42 +111,173 @@ static void sched_switch_user(struct task_struct *p) sched_move_task(p); } -#else /* CONFIG_FAIR_USER_SCHED */ +static inline void uids_mutex_lock(void) +{ + mutex_lock(&uids_mutex); +} -static void sched_destroy_user(struct user_struct *up) { } -static int sched_create_user(struct user_struct *up) { return 0; } -static void sched_switch_user(struct task_struct *p) { } +static inline void uids_mutex_unlock(void) +{ + mutex_unlock(&uids_mutex); +} -#endif /* CONFIG_FAIR_USER_SCHED */ +/* return cpu shares held by the user */ +ssize_t cpu_shares_show(struct kset *kset, char *buffer) +{ + struct user_struct *up = container_of(kset, struct user_struct, kset); -/* - * These routines must be called with the uidhash spinlock held! + return sprintf(buffer, "%lu\n", sched_group_shares(up->tg)); +} + +/* modify cpu shares held by the user */ +ssize_t cpu_shares_store(struct kset *kset, const char *buffer, size_t size) +{ + struct user_struct *up = container_of(kset, struct user_struct, kset); + unsigned long shares; + int rc; + + sscanf(buffer, "%lu", &shares); + + rc = sched_group_set_shares(up->tg, shares); + + return (rc ? rc : size); +} + +static void user_attr_init(struct subsys_attribute *sa, char *name, int mode) +{ + sa->attr.name = name; + sa->attr.mode = mode; + sa->show = cpu_shares_show; + sa->store = cpu_shares_store; +} + +/* Create "/sys/kernel/uids/" directory and + * "/sys/kernel/uids//cpu_share" file for this user. */ -static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) +static int user_kobject_create(struct user_struct *up) { - hlist_add_head(&up->uidhash_node, hashent); + struct kset *kset = &up->kset; + struct kobject *kobj = &kset->kobj; + int error; + + memset(kset, 0, sizeof(struct kset)); + kobj->parent = &uids_kobject; /* create under /sys/kernel/uids dir */ + kobject_set_name(kobj, "%d", up->uid); + kset_init(kset); + user_attr_init(&up->user_attr, "cpu_share", 0644); + + error = kobject_add(kobj); + if (error) + goto done; + + error = sysfs_create_file(kobj, &up->user_attr.attr); + if (error) + kobject_del(kobj); + +done: + return error; } -static inline void uid_hash_remove(struct user_struct *up) +/* create these in sysfs filesystem: + * "/sys/kernel/uids" directory + * "/sys/kernel/uids/0" directory (for root user) + * "/sys/kernel/uids/0/cpu_share" file (for root user) + */ +int __init uids_kobject_init(void) { - hlist_del_init(&up->uidhash_node); + int error; + + /* create under /sys/kernel dir */ + uids_kobject.parent = &kernel_subsys.kobj; + kobject_set_name(&uids_kobject, "uids"); + kobject_init(&uids_kobject); + + error = kobject_add(&uids_kobject); + if (!error) + error = user_kobject_create(&root_user); + + return error; } -static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) +/* work function to remove sysfs directory for a user and free up + * corresponding structures. + */ +static void remove_user_sysfs_dir(struct work_struct *w) { - struct user_struct *user; - struct hlist_node *h; + struct user_struct *up = container_of(w, struct user_struct, work); + struct kobject *kobj = &up->kset.kobj; + unsigned long flags; + int remove_user = 0; - hlist_for_each_entry(user, h, hashent, uidhash_node) { - if(user->uid == uid) { - atomic_inc(&user->__count); - return user; - } + /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() + * atomic. + */ + uids_mutex_lock(); + + local_irq_save(flags); + + if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { + uid_hash_remove(up); + remove_user = 1; + spin_unlock_irqrestore(&uidhash_lock, flags); + } else { + local_irq_restore(flags); } - return NULL; + if (!remove_user) + goto done; + + sysfs_remove_file(kobj, &up->user_attr.attr); + kobject_del(kobj); + + sched_destroy_user(up); + key_put(up->uid_keyring); + key_put(up->session_keyring); + kmem_cache_free(uid_cachep, up); + +done: + uids_mutex_unlock(); +} + +/* IRQs are disabled and uidhash_lock is held upon function entry. + * IRQ state (as stored in flags) is restored and uidhash_lock released + * upon function exit. + */ +static inline void free_user(struct user_struct *up, unsigned long flags) +{ + /* restore back the count */ + atomic_inc(&up->__count); + spin_unlock_irqrestore(&uidhash_lock, flags); + + INIT_WORK(&up->work, remove_user_sysfs_dir); + schedule_work(&up->work); } +#else /* CONFIG_FAIR_USER_SCHED */ + +static void sched_destroy_user(struct user_struct *up) { } +static int sched_create_user(struct user_struct *up) { return 0; } +static void sched_switch_user(struct task_struct *p) { } +static inline int user_kobject_create(struct user_struct *up) { return 0; } +static inline void uids_mutex_lock(void) { } +static inline void uids_mutex_unlock(void) { } + +/* IRQs are disabled and uidhash_lock is held upon function entry. + * IRQ state (as stored in flags) is restored and uidhash_lock released + * upon function exit. + */ +static inline void free_user(struct user_struct *up, unsigned long flags) +{ + uid_hash_remove(up); + spin_unlock_irqrestore(&uidhash_lock, flags); + sched_destroy_user(up); + key_put(up->uid_keyring); + key_put(up->session_keyring); + kmem_cache_free(uid_cachep, up); +} + +#endif /* CONFIG_FAIR_USER_SCHED */ + /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). @@ -139,16 +304,10 @@ void free_uid(struct user_struct *up) return; local_irq_save(flags); - if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { - uid_hash_remove(up); - spin_unlock_irqrestore(&uidhash_lock, flags); - sched_destroy_user(up); - key_put(up->uid_keyring); - key_put(up->session_keyring); - kmem_cache_free(uid_cachep, up); - } else { + if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) + free_user(up, flags); + else local_irq_restore(flags); - } } struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) @@ -156,6 +315,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) struct hlist_head *hashent = uidhashentry(ns, uid); struct user_struct *up; + /* Make uid_hash_find() + user_kobject_create() + uid_hash_insert() + * atomic. + */ + uids_mutex_lock(); + spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); spin_unlock_irq(&uidhash_lock); @@ -191,6 +355,15 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) return NULL; } + if (user_kobject_create(new)) { + sched_destroy_user(new); + key_put(new->uid_keyring); + key_put(new->session_keyring); + kmem_cache_free(uid_cachep, new); + uids_mutex_unlock(); + return NULL; + } + /* * Before adding this, check whether we raced * on adding the same user already.. @@ -198,7 +371,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - sched_destroy_user(new); + /* This case is not possible when CONFIG_FAIR_USER_SCHED + * is defined, since we serialize alloc_uid() using + * uids_mutex. Hence no need to call + * sched_destroy_user() or remove_user_sysfs_dir(). + */ key_put(new->uid_keyring); key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); @@ -209,6 +386,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_unlock_irq(&uidhash_lock); } + + uids_mutex_unlock(); + return up; } -- cgit v1.2.3 From fb7dde37ece82e13de383afd7042c45df67a9d17 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:18 +0200 Subject: sched: generate uevents for user creation/destruction Generate uevents when a user is being created/destroyed. These events can be used to configure cpu share of a new user. Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- kernel/user.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/user.c') diff --git a/kernel/user.c b/kernel/user.c index 74cadea8466f..f0e561e6d085 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -174,6 +174,8 @@ static int user_kobject_create(struct user_struct *up) if (error) kobject_del(kobj); + kobject_uevent(kobj, KOBJ_ADD); + done: return error; } @@ -189,6 +191,7 @@ int __init uids_kobject_init(void) /* create under /sys/kernel dir */ uids_kobject.parent = &kernel_subsys.kobj; + uids_kobject.kset = &kernel_subsys; kobject_set_name(&uids_kobject, "uids"); kobject_init(&uids_kobject); @@ -228,6 +231,7 @@ static void remove_user_sysfs_dir(struct work_struct *w) goto done; sysfs_remove_file(kobj, &up->user_attr.attr); + kobject_uevent(kobj, KOBJ_REMOVE); kobject_del(kobj); sched_destroy_user(up); -- cgit v1.2.3