diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bounds.c | 23 | ||||
-rw-r--r-- | kernel/cpuset.c | 25 | ||||
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 66 | ||||
-rw-r--r-- | kernel/hrtimer.c | 34 | ||||
-rw-r--r-- | kernel/kexec.c | 3 | ||||
-rw-r--r-- | kernel/kprobes.c | 349 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 2 | ||||
-rw-r--r-- | kernel/power/Kconfig | 10 | ||||
-rw-r--r-- | kernel/power/Makefile | 1 | ||||
-rw-r--r-- | kernel/power/console.c | 27 | ||||
-rw-r--r-- | kernel/power/pm.c | 205 | ||||
-rw-r--r-- | kernel/ptrace.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 54 | ||||
-rw-r--r-- | kernel/sys.c | 27 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 1 |
16 files changed, 414 insertions, 428 deletions
diff --git a/kernel/bounds.c b/kernel/bounds.c new file mode 100644 index 000000000000..c3c55544db2f --- /dev/null +++ b/kernel/bounds.c @@ -0,0 +1,23 @@ +/* + * Generate definitions needed by the preprocessor. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#define __GENERATING_BOUNDS_H +/* Include headers that define the enum constants of interest */ +#include <linux/page-flags.h> +#include <linux/mmzone.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +void foo(void) +{ + /* The enum constants to put into include/linux/bounds.h */ + DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); + DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); + /* End of constants */ +} diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8b35fbd8292f..48a976c52cf5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -941,7 +941,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) cs->mems_generation = cpuset_mems_generation++; mutex_unlock(&callback_mutex); - cpuset_being_rebound = cs; /* causes mpol_copy() rebind */ + cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ fudge = 10; /* spare mmarray[] slots */ fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ @@ -992,7 +992,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) * rebind the vma mempolicies of each mm in mmarray[] to their * new cpuset, and release that mm. The mpol_rebind_mm() * call takes mmap_sem, which we couldn't take while holding - * tasklist_lock. Forks can happen again now - the mpol_copy() + * tasklist_lock. Forks can happen again now - the mpol_dup() * cpuset_being_rebound check will catch such forks, and rebind * their vma mempolicies too. Because we still hold the global * cgroup_mutex, we know that no other rebind effort will @@ -1265,7 +1265,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, return -E2BIG; /* +1 for nul-terminator */ - if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0) + buffer = kmalloc(nbytes + 1, GFP_KERNEL); + if (!buffer) return -ENOMEM; if (copy_from_user(buffer, userbuf, nbytes)) { @@ -1958,22 +1959,14 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) } /** - * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed - * @zl: the zonelist to be checked + * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed + * @nodemask: the nodemask to be checked * - * Are any of the nodes on zonelist zl allowed in current->mems_allowed? + * Are any of the nodes in the nodemask allowed in current->mems_allowed? */ -int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) +int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { - int i; - - for (i = 0; zl->zones[i]; i++) { - int nid = zone_to_nid(zl->zones[i]); - - if (node_isset(nid, current->mems_allowed)) - return 1; - } - return 0; + return nodes_intersects(*nodemask, current->mems_allowed); } /* diff --git a/kernel/exit.c b/kernel/exit.c index cece89f80ab4..2a9d98c641ac 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -507,10 +507,9 @@ void put_files_struct(struct files_struct *files) } } -EXPORT_SYMBOL(put_files_struct); - -void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +void reset_files_struct(struct files_struct *files) { + struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; @@ -519,7 +518,6 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files) task_unlock(tsk); put_files_struct(old); } -EXPORT_SYMBOL(reset_files_struct); void exit_files(struct task_struct *tsk) { @@ -969,7 +967,7 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA - mpol_free(tsk->mempolicy); + mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif #ifdef CONFIG_FUTEX diff --git a/kernel/fork.c b/kernel/fork.c index 89fe414645e9..6067e429f281 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -279,7 +279,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (!tmp) goto fail_nomem; *tmp = *mpnt; - pol = mpol_copy(vma_policy(mpnt)); + pol = mpol_dup(vma_policy(mpnt)); retval = PTR_ERR(pol); if (IS_ERR(pol)) goto fail_nomem_policy; @@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -static struct mm_struct *dup_mm(struct task_struct *tsk) +struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; @@ -805,12 +805,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) goto out; } - /* - * Note: we may be using current for both targets (See exec.c) - * This works because we cache current->files (old) as oldf. Don't - * break this. - */ - tsk->files = NULL; newf = dup_fd(oldf, &error); if (!newf) goto out; @@ -846,34 +840,6 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) return 0; } -/* - * Helper to unshare the files of the current task. - * We don't want to expose copy_files internals to - * the exec layer of the kernel. - */ - -int unshare_files(void) -{ - struct files_struct *files = current->files; - int rc; - - BUG_ON(!files); - - /* This can race but the race causes us to copy when we don't - need to and drop the copy */ - if(atomic_read(&files->count) == 1) - { - atomic_inc(&files->count); - return 0; - } - rc = copy_files(0, current); - if(rc) - current->files = files; - return rc; -} - -EXPORT_SYMBOL(unshare_files); - static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; @@ -1150,7 +1116,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA - p->mempolicy = mpol_copy(p->mempolicy); + p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; @@ -1408,7 +1374,7 @@ bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA - mpol_free(p->mempolicy); + mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); @@ -1811,3 +1777,27 @@ bad_unshare_cleanup_thread: bad_unshare_out: return err; } + +/* + * Helper to unshare the files of the current task. + * We don't want to expose copy_files internals to + * the exec layer of the kernel. + */ + +int unshare_files(struct files_struct **displaced) +{ + struct task_struct *task = current; + struct files_struct *copy = NULL; + int error; + + error = unshare_fd(CLONE_FILES, ©); + if (error || !copy) { + *displaced = NULL; + return error; + } + *displaced = task->files; + task_lock(task); + task->files = copy; + task_unlock(task); + return 0; +} diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f78777abe769..dea4c9124ac8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -590,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, list_add_tail(&timer->cb_entry, &base->cpu_base->cb_pending); timer->state = HRTIMER_STATE_PENDING; - raise_softirq(HRTIMER_SOFTIRQ); return 1; default: BUG(); @@ -633,6 +632,11 @@ static int hrtimer_switch_to_hres(void) return 1; } +static inline void hrtimer_raise_softirq(void) +{ + raise_softirq(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -651,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } +static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -850,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret; + int ret, raise; base = lock_hrtimer_base(timer, &flags); @@ -884,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); + /* + * The timer may be expired and moved to the cb_pending + * list. We can not raise the softirq with base lock held due + * to a possible deadlock with runqueue lock. + */ + raise = timer->state == HRTIMER_STATE_PENDING; + unlock_hrtimer_base(timer, &flags); + if (raise) + hrtimer_raise_softirq(); + return ret; } EXPORT_SYMBOL_GPL(hrtimer_start); @@ -1080,8 +1095,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) * If the timer was rearmed on another CPU, reprogram * the event device. */ - if (timer->base->first == &timer->node) - hrtimer_reprogram(timer, timer->base); + struct hrtimer_clock_base *base = timer->base; + + if (base->first == &timer->node && + hrtimer_reprogram(timer, base)) { + /* + * Timer is expired. Thus move it from tree to + * pending list again. + */ + __remove_hrtimer(timer, base, + HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + } } } spin_unlock_irq(&cpu_base->lock); diff --git a/kernel/kexec.c b/kernel/kexec.c index 6782dce93d01..cb85c79989b4 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1405,6 +1405,9 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); + VMCOREINFO_NUMBER(PG_lru); + VMCOREINFO_NUMBER(PG_private); + VMCOREINFO_NUMBER(PG_swapcache); arch_crash_save_vmcoreinfo(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index fcfb580c3afc..1e0250cb9486 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -72,6 +72,18 @@ DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +/* + * Normally, functions that we'd want to prohibit kprobes in, are marked + * __kprobes. But, there are cases where such functions already belong to + * a different section (__sched for preempt_schedule) + * + * For such cases, we now have a blacklist + */ +struct kprobe_blackpoint kprobe_blacklist[] = { + {"preempt_schedule",}, + {NULL} /* Terminator */ +}; + #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* * kprobe->ainsn.insn points to the copy of the instruction to be @@ -417,6 +429,21 @@ static inline void free_rp_inst(struct kretprobe *rp) } } +static void __kprobes cleanup_rp_inst(struct kretprobe *rp) +{ + unsigned long flags; + struct kretprobe_instance *ri; + struct hlist_node *pos, *next; + /* No race here */ + spin_lock_irqsave(&kretprobe_lock, flags); + hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { + ri->rp = NULL; + hlist_del(&ri->uflist); + } + spin_unlock_irqrestore(&kretprobe_lock, flags); + free_rp_inst(rp); +} + /* * Keep all fields in the kprobe consistent */ @@ -492,9 +519,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, static int __kprobes in_kprobes_functions(unsigned long addr) { + struct kprobe_blackpoint *kb; + if (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) return -EINVAL; + /* + * If there exists a kprobe_blacklist, verify and + * fail any probe registration in the prohibited area + */ + for (kb = kprobe_blacklist; kb->name != NULL; kb++) { + if (kb->start_addr) { + if (addr >= kb->start_addr && + addr < (kb->start_addr + kb->range)) + return -EINVAL; + } + } return 0; } @@ -555,6 +595,7 @@ static int __kprobes __register_kprobe(struct kprobe *p, } p->nmissed = 0; + INIT_LIST_HEAD(&p->list); mutex_lock(&kprobe_mutex); old_p = get_kprobe(p->addr); if (old_p) { @@ -581,35 +622,28 @@ out: return ret; } -int __kprobes register_kprobe(struct kprobe *p) -{ - return __register_kprobe(p, (unsigned long)__builtin_return_address(0)); -} - -void __kprobes unregister_kprobe(struct kprobe *p) +/* + * Unregister a kprobe without a scheduler synchronization. + */ +static int __kprobes __unregister_kprobe_top(struct kprobe *p) { - struct module *mod; struct kprobe *old_p, *list_p; - int cleanup_p; - mutex_lock(&kprobe_mutex); old_p = get_kprobe(p->addr); - if (unlikely(!old_p)) { - mutex_unlock(&kprobe_mutex); - return; - } + if (unlikely(!old_p)) + return -EINVAL; + if (p != old_p) { list_for_each_entry_rcu(list_p, &old_p->list, list) if (list_p == p) /* kprobe p is a valid probe */ goto valid_p; - mutex_unlock(&kprobe_mutex); - return; + return -EINVAL; } valid_p: if (old_p == p || (old_p->pre_handler == aggr_pre_handler && - p->list.next == &old_p->list && p->list.prev == &old_p->list)) { + list_is_singular(&old_p->list))) { /* * Only probe on the hash list. Disarm only if kprobes are * enabled - otherwise, the breakpoint would already have @@ -618,43 +652,97 @@ valid_p: if (kprobe_enabled) arch_disarm_kprobe(p); hlist_del_rcu(&old_p->hlist); - cleanup_p = 1; } else { + if (p->break_handler) + old_p->break_handler = NULL; + if (p->post_handler) { + list_for_each_entry_rcu(list_p, &old_p->list, list) { + if ((list_p != p) && (list_p->post_handler)) + goto noclean; + } + old_p->post_handler = NULL; + } +noclean: list_del_rcu(&p->list); - cleanup_p = 0; } + return 0; +} - mutex_unlock(&kprobe_mutex); +static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) +{ + struct module *mod; + struct kprobe *old_p; - synchronize_sched(); if (p->mod_refcounted) { mod = module_text_address((unsigned long)p->addr); if (mod) module_put(mod); } - if (cleanup_p) { - if (p != old_p) { - list_del_rcu(&p->list); + if (list_empty(&p->list) || list_is_singular(&p->list)) { + if (!list_empty(&p->list)) { + /* "p" is the last child of an aggr_kprobe */ + old_p = list_entry(p->list.next, struct kprobe, list); + list_del(&p->list); kfree(old_p); } arch_remove_kprobe(p); - } else { - mutex_lock(&kprobe_mutex); - if (p->break_handler) - old_p->break_handler = NULL; - if (p->post_handler){ - list_for_each_entry_rcu(list_p, &old_p->list, list){ - if (list_p->post_handler){ - cleanup_p = 2; - break; - } - } - if (cleanup_p == 0) - old_p->post_handler = NULL; + } +} + +static int __register_kprobes(struct kprobe **kps, int num, + unsigned long called_from) +{ + int i, ret = 0; + + if (num <= 0) + return -EINVAL; + for (i = 0; i < num; i++) { + ret = __register_kprobe(kps[i], called_from); + if (ret < 0 && i > 0) { + unregister_kprobes(kps, i); + break; } - mutex_unlock(&kprobe_mutex); } + return ret; +} + +/* + * Registration and unregistration functions for kprobe. + */ +int __kprobes register_kprobe(struct kprobe *p) +{ + return __register_kprobes(&p, 1, + (unsigned long)__builtin_return_address(0)); +} + +void __kprobes unregister_kprobe(struct kprobe *p) +{ + unregister_kprobes(&p, 1); +} + +int __kprobes register_kprobes(struct kprobe **kps, int num) +{ + return __register_kprobes(kps, num, + (unsigned long)__builtin_return_address(0)); +} + +void __kprobes unregister_kprobes(struct kprobe **kps, int num) +{ + int i; + + if (num <= 0) + return; + mutex_lock(&kprobe_mutex); + for (i = 0; i < num; i++) + if (__unregister_kprobe_top(kps[i]) < 0) + kps[i]->addr = NULL; + mutex_unlock(&kprobe_mutex); + + synchronize_sched(); + for (i = 0; i < num; i++) + if (kps[i]->addr) + __unregister_kprobe_bottom(kps[i]); } static struct notifier_block kprobe_exceptions_nb = { @@ -667,24 +755,69 @@ unsigned long __weak arch_deref_entry_point(void *entry) return (unsigned long)entry; } -int __kprobes register_jprobe(struct jprobe *jp) +static int __register_jprobes(struct jprobe **jps, int num, + unsigned long called_from) { - unsigned long addr = arch_deref_entry_point(jp->entry); + struct jprobe *jp; + int ret = 0, i; - if (!kernel_text_address(addr)) + if (num <= 0) return -EINVAL; + for (i = 0; i < num; i++) { + unsigned long addr; + jp = jps[i]; + addr = arch_deref_entry_point(jp->entry); + + if (!kernel_text_address(addr)) + ret = -EINVAL; + else { + /* Todo: Verify probepoint is a function entry point */ + jp->kp.pre_handler = setjmp_pre_handler; + jp->kp.break_handler = longjmp_break_handler; + ret = __register_kprobe(&jp->kp, called_from); + } + if (ret < 0 && i > 0) { + unregister_jprobes(jps, i); + break; + } + } + return ret; +} - /* Todo: Verify probepoint is a function entry point */ - jp->kp.pre_handler = setjmp_pre_handler; - jp->kp.break_handler = longjmp_break_handler; - - return __register_kprobe(&jp->kp, +int __kprobes register_jprobe(struct jprobe *jp) +{ + return __register_jprobes(&jp, 1, (unsigned long)__builtin_return_address(0)); } void __kprobes unregister_jprobe(struct jprobe *jp) { - unregister_kprobe(&jp->kp); + unregister_jprobes(&jp, 1); +} + +int __kprobes register_jprobes(struct jprobe **jps, int num) +{ + return __register_jprobes(jps, num, + (unsigned long)__builtin_return_address(0)); +} + +void __kprobes unregister_jprobes(struct jprobe **jps, int num) +{ + int i; + + if (num <= 0) + return; + mutex_lock(&kprobe_mutex); + for (i = 0; i < num; i++) + if (__unregister_kprobe_top(&jps[i]->kp) < 0) + jps[i]->kp.addr = NULL; + mutex_unlock(&kprobe_mutex); + + synchronize_sched(); + for (i = 0; i < num; i++) { + if (jps[i]->kp.addr) + __unregister_kprobe_bottom(&jps[i]->kp); + } } #ifdef CONFIG_KRETPROBES @@ -725,7 +858,8 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, return 0; } -int __kprobes register_kretprobe(struct kretprobe *rp) +static int __kprobes __register_kretprobe(struct kretprobe *rp, + unsigned long called_from) { int ret = 0; struct kretprobe_instance *inst; @@ -771,46 +905,101 @@ int __kprobes register_kretprobe(struct kretprobe *rp) rp->nmissed = 0; /* Establish function entry probe point */ - if ((ret = __register_kprobe(&rp->kp, - (unsigned long)__builtin_return_address(0))) != 0) + ret = __register_kprobe(&rp->kp, called_from); + if (ret != 0) free_rp_inst(rp); return ret; } +static int __register_kretprobes(struct kretprobe **rps, int num, + unsigned long called_from) +{ + int ret = 0, i; + + if (num <= 0) + return -EINVAL; + for (i = 0; i < num; i++) { + ret = __register_kretprobe(rps[i], called_from); + if (ret < 0 && i > 0) { + unregister_kretprobes(rps, i); + break; + } + } + return ret; +} + +int __kprobes register_kretprobe(struct kretprobe *rp) +{ + return __register_kretprobes(&rp, 1, + (unsigned long)__builtin_return_address(0)); +} + +void __kprobes unregister_kretprobe(struct kretprobe *rp) +{ + unregister_kretprobes(&rp, 1); +} + +int __kprobes register_kretprobes(struct kretprobe **rps, int num) +{ + return __register_kretprobes(rps, num, + (unsigned long)__builtin_return_address(0)); +} + +void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +{ + int i; + + if (num <= 0) + return; + mutex_lock(&kprobe_mutex); + for (i = 0; i < num; i++) + if (__unregister_kprobe_top(&rps[i]->kp) < 0) + rps[i]->kp.addr = NULL; + mutex_unlock(&kprobe_mutex); + + synchronize_sched(); + for (i = 0; i < num; i++) { + if (rps[i]->kp.addr) { + __unregister_kprobe_bottom(&rps[i]->kp); + cleanup_rp_inst(rps[i]); + } + } +} + #else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } -static int __kprobes pre_handler_kretprobe(struct kprobe *p, - struct pt_regs *regs) +int __kprobes register_kretprobes(struct kretprobe **rps, int num) { - return 0; + return -ENOSYS; } -#endif /* CONFIG_KRETPROBES */ - void __kprobes unregister_kretprobe(struct kretprobe *rp) { - unsigned long flags; - struct kretprobe_instance *ri; - struct hlist_node *pos, *next; +} - unregister_kprobe(&rp->kp); +void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +{ +} - /* No race here */ - spin_lock_irqsave(&kretprobe_lock, flags); - hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { - ri->rp = NULL; - hlist_del(&ri->uflist); - } - spin_unlock_irqrestore(&kretprobe_lock, flags); - free_rp_inst(rp); +static int __kprobes pre_handler_kretprobe(struct kprobe *p, + struct pt_regs *regs) +{ + return 0; } +#endif /* CONFIG_KRETPROBES */ + static int __init init_kprobes(void) { int i, err = 0; + unsigned long offset = 0, size = 0; + char *modname, namebuf[128]; + const char *symbol_name; + void *addr; + struct kprobe_blackpoint *kb; /* FIXME allocate the probe table, currently defined statically */ /* initialize all list heads */ @@ -819,6 +1008,28 @@ static int __init init_kprobes(void) INIT_HLIST_HEAD(&kretprobe_inst_table[i]); } + /* + * Lookup and populate the kprobe_blacklist. + * + * Unlike the kretprobe blacklist, we'll need to determine + * the range of addresses that belong to the said functions, + * since a kprobe need not necessarily be at the beginning + * of a function. + */ + for (kb = kprobe_blacklist; kb->name != NULL; kb++) { + kprobe_lookup_name(kb->name, addr); + if (!addr) + continue; + + kb->start_addr = (unsigned long)addr; + symbol_name = kallsyms_lookup(kb->start_addr, + &size, &offset, &modname, namebuf); + if (!symbol_name) + kb->range = 0; + else + kb->range = size; + } + if (kretprobe_blacklist_size) { /* lookup the function address from its name */ for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { @@ -1066,8 +1277,12 @@ module_init(init_kprobes); EXPORT_SYMBOL_GPL(register_kprobe); EXPORT_SYMBOL_GPL(unregister_kprobe); +EXPORT_SYMBOL_GPL(register_kprobes); +EXPORT_SYMBOL_GPL(unregister_kprobes); EXPORT_SYMBOL_GPL(register_jprobe); EXPORT_SYMBOL_GPL(unregister_jprobe); +EXPORT_SYMBOL_GPL(register_jprobes); +EXPORT_SYMBOL_GPL(unregister_jprobes); #ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(jprobe_return); #endif @@ -1075,4 +1290,6 @@ EXPORT_SYMBOL_GPL(jprobe_return); #ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(register_kretprobe); EXPORT_SYMBOL_GPL(unregister_kretprobe); +EXPORT_SYMBOL_GPL(register_kretprobes); +EXPORT_SYMBOL_GPL(unregister_kretprobes); #endif diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6d792b66d854..5ca37fa50beb 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -92,7 +92,7 @@ static struct pid_namespace *create_pid_namespace(int level) atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); for (i = 1; i < PIDMAP_ENTRIES; i++) { - ns->pidmap[i].page = 0; + ns->pidmap[i].page = NULL; atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6233f3b4ae66..b45da40e8d25 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -19,16 +19,6 @@ config PM will issue the hlt instruction if nothing is to be done, thereby sending the processor to sleep and saving power. -config PM_LEGACY - bool "Legacy Power Management API (DEPRECATED)" - depends on PM - default n - ---help--- - Support for pm_register() and friends. This old API is obsoleted - by the driver model. - - If unsure, say N. - config PM_DEBUG bool "Power Management Debug Support" depends on PM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index f7dfff28ecdb..597823b5b700 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -4,7 +4,6 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := main.o -obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_PM_SLEEP) += process.o console.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o diff --git a/kernel/power/console.c b/kernel/power/console.c index 89bcf4973ee5..b8628be2a465 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -7,17 +7,39 @@ #include <linux/vt_kern.h> #include <linux/kbd_kern.h> #include <linux/console.h> +#include <linux/module.h> #include "power.h" #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) static int orig_fgconsole, orig_kmsg; +static int disable_vt_switch; + +/* + * Normally during a suspend, we allocate a new console and switch to it. + * When we resume, we switch back to the original console. This switch + * can be slow, so on systems where the framebuffer can handle restoration + * of video registers anyways, there's little point in doing the console + * switch. This function allows you to disable it by passing it '0'. + */ +void pm_set_vt_switch(int do_switch) +{ + acquire_console_sem(); + disable_vt_switch = !do_switch; + release_console_sem(); +} +EXPORT_SYMBOL(pm_set_vt_switch); int pm_prepare_console(void) { acquire_console_sem(); + if (disable_vt_switch) { + release_console_sem(); + return 0; + } + orig_fgconsole = fg_console; if (vc_allocate(SUSPEND_CONSOLE)) { @@ -50,9 +72,12 @@ int pm_prepare_console(void) void pm_restore_console(void) { acquire_console_sem(); + if (disable_vt_switch) { + release_console_sem(); + return; + } set_console(orig_fgconsole); release_console_sem(); kmsg_redirect = orig_kmsg; - return; } #endif diff --git a/kernel/power/pm.c b/kernel/power/pm.c deleted file mode 100644 index 60c73fa670d5..000000000000 --- a/kernel/power/pm.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * pm.c - Power management interface - * - * Copyright (C) 2000 Andrew Henroid - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/pm.h> -#include <linux/pm_legacy.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> - -/* - * Locking notes: - * pm_devs_lock can be a semaphore providing pm ops are not called - * from an interrupt handler (already a bad idea so no change here). Each - * change must be protected so that an unlink of an entry doesn't clash - * with a pm send - which is permitted to sleep in the current architecture - * - * Module unloads clashing with pm events now work out safely, the module - * unload path will block until the event has been sent. It may well block - * until a resume but that will be fine. - */ - -static DEFINE_MUTEX(pm_devs_lock); -static LIST_HEAD(pm_devs); - -/** - * pm_register - register a device with power management - * @type: device type - * @id: device ID - * @callback: callback function - * - * Add a device to the list of devices that wish to be notified about - * power management events. A &pm_dev structure is returned on success, - * on failure the return is %NULL. - * - * The callback function will be called in process context and - * it may sleep. - */ - -struct pm_dev *pm_register(pm_dev_t type, - unsigned long id, - pm_callback callback) -{ - struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); - if (dev) { - dev->type = type; - dev->id = id; - dev->callback = callback; - - mutex_lock(&pm_devs_lock); - list_add(&dev->entry, &pm_devs); - mutex_unlock(&pm_devs_lock); - } - return dev; -} - -/** - * pm_send - send request to a single device - * @dev: device to send to - * @rqst: power management request - * @data: data for the callback - * - * Issue a power management request to a given device. The - * %PM_SUSPEND and %PM_RESUME events are handled specially. The - * data field must hold the intended next state. No call is made - * if the state matches. - * - * BUGS: what stops two power management requests occurring in parallel - * and conflicting. - * - * WARNING: Calling pm_send directly is not generally recommended, in - * particular there is no locking against the pm_dev going away. The - * caller must maintain all needed locking or have 'inside knowledge' - * on the safety. Also remember that this function is not locked against - * pm_unregister. This means that you must handle SMP races on callback - * execution and unload yourself. - */ - -static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) -{ - int status = 0; - unsigned long prev_state, next_state; - - if (in_interrupt()) - BUG(); - - switch (rqst) { - case PM_SUSPEND: - case PM_RESUME: - prev_state = dev->state; - next_state = (unsigned long) data; - if (prev_state != next_state) { - if (dev->callback) - status = (*dev->callback)(dev, rqst, data); - if (!status) { - dev->state = next_state; - dev->prev_state = prev_state; - } - } - else { - dev->prev_state = prev_state; - } - break; - default: - if (dev->callback) - status = (*dev->callback)(dev, rqst, data); - break; - } - return status; -} - -/* - * Undo incomplete request - */ -static void pm_undo_all(struct pm_dev *last) -{ - struct list_head *entry = last->entry.prev; - while (entry != &pm_devs) { - struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); - if (dev->state != dev->prev_state) { - /* previous state was zero (running) resume or - * previous state was non-zero (suspended) suspend - */ - pm_request_t undo = (dev->prev_state - ? PM_SUSPEND:PM_RESUME); - pm_send(dev, undo, (void*) dev->prev_state); - } - entry = entry->prev; - } -} - -/** - * pm_send_all - send request to all managed devices - * @rqst: power management request - * @data: data for the callback - * - * Issue a power management request to a all devices. The - * %PM_SUSPEND events are handled specially. Any device is - * permitted to fail a suspend by returning a non zero (error) - * value from its callback function. If any device vetoes a - * suspend request then all other devices that have suspended - * during the processing of this request are restored to their - * previous state. - * - * WARNING: This function takes the pm_devs_lock. The lock is not dropped until - * the callbacks have completed. This prevents races against pm locking - * functions, races against module unload pm_unregister code. It does - * mean however that you must not issue pm_ functions within the callback - * or you will deadlock and users will hate you. - * - * Zero is returned on success. If a suspend fails then the status - * from the device that vetoes the suspend is returned. - * - * BUGS: what stops two power management requests occurring in parallel - * and conflicting. - */ - -int pm_send_all(pm_request_t rqst, void *data) -{ - struct list_head *entry; - - mutex_lock(&pm_devs_lock); - entry = pm_devs.next; - while (entry != &pm_devs) { - struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); - if (dev->callback) { - int status = pm_send(dev, rqst, data); - if (status) { - /* return devices to previous state on - * failed suspend request - */ - if (rqst == PM_SUSPEND) - pm_undo_all(dev); - mutex_unlock(&pm_devs_lock); - return status; - } - } - entry = entry->next; - } - mutex_unlock(&pm_devs_lock); - return 0; -} - -EXPORT_SYMBOL(pm_register); -EXPORT_SYMBOL(pm_send_all); - diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 67e392ed5496..dac4b4e57293 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) return (copied == sizeof(data)) ? 0 : -EIO; } -#ifdef CONFIG_COMPAT +#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE #include <linux/compat.h> int compat_ptrace_request(struct task_struct *child, compat_long_t request, @@ -667,7 +667,6 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, return ret; } -#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data) { @@ -710,6 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, unlock_kernel(); return ret; } -#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ - -#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */ diff --git a/kernel/sched.c b/kernel/sched.c index 0014b03adaca..740fb409e5bb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1657,42 +1657,6 @@ void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd) } /* - * Redistribute tg->shares amongst all tg->cfs_rq[]s. - */ -static void __aggregate_redistribute_shares(struct task_group *tg) -{ - int i, max_cpu = smp_processor_id(); - unsigned long rq_weight = 0; - unsigned long shares, max_shares = 0, shares_rem = tg->shares; - - for_each_possible_cpu(i) - rq_weight += tg->cfs_rq[i]->load.weight; - - for_each_possible_cpu(i) { - /* - * divide shares proportional to the rq_weights. - */ - shares = tg->shares * tg->cfs_rq[i]->load.weight; - shares /= rq_weight + 1; - - tg->cfs_rq[i]->shares = shares; - - if (shares > max_shares) { - max_shares = shares; - max_cpu = i; - } - shares_rem -= shares; - } - - /* - * Ensure it all adds up to tg->shares; we can loose a few - * due to rounding down when computing the per-cpu shares. - */ - if (shares_rem) - tg->cfs_rq[max_cpu]->shares += shares_rem; -} - -/* * Compute the weight of this group on the given cpus. */ static @@ -1701,18 +1665,11 @@ void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd) unsigned long shares = 0; int i; -again: for_each_cpu_mask(i, sd->span) shares += tg->cfs_rq[i]->shares; - /* - * When the span doesn't have any shares assigned, but does have - * tasks to run do a machine wide rebalance (should be rare). - */ - if (unlikely(!shares && aggregate(tg, sd)->rq_weight)) { - __aggregate_redistribute_shares(tg); - goto again; - } + if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares) + shares = tg->shares; aggregate(tg, sd)->shares = shares; } @@ -7991,11 +7948,6 @@ void __init sched_init_smp(void) #else void __init sched_init_smp(void) { -#if defined(CONFIG_NUMA) - sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), - GFP_KERNEL); - BUG_ON(sched_group_nodes_bycpu == NULL); -#endif sched_init_granularity(); } #endif /* CONFIG_SMP */ @@ -8128,7 +8080,7 @@ void __init sched_init(void) * we use alloc_bootmem(). */ if (alloc_size) { - ptr = (unsigned long)alloc_bootmem_low(alloc_size); + ptr = (unsigned long)alloc_bootmem(alloc_size); #ifdef CONFIG_FAIR_GROUP_SCHED init_task_group.se = (struct sched_entity **)ptr; diff --git a/kernel/sys.c b/kernel/sys.c index 6a0cc71ee88d..f2a451366953 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1632,10 +1632,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error; + long uninitialized_var(error); - error = security_task_prctl(option, arg2, arg3, arg4, arg5); - if (error) + if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; switch (option) { @@ -1688,17 +1687,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; - case PR_GET_KEEPCAPS: - if (current->keep_capabilities) - error = 1; - break; - case PR_SET_KEEPCAPS: - if (arg2 != 0 && arg2 != 1) { - error = -EINVAL; - break; - } - current->keep_capabilities = arg2; - break; case PR_SET_NAME: { struct task_struct *me = current; unsigned char ncomm[sizeof(me->comm)]; @@ -1732,17 +1720,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_SECCOMP: error = prctl_set_seccomp(arg2); break; - - case PR_CAPBSET_READ: - if (!cap_valid(arg2)) - return -EINVAL; - return !!cap_raised(current->cap_bset, arg2); - case PR_CAPBSET_DROP: -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES - return cap_prctl_drop(arg2); -#else - return -EINVAL; -#endif case PR_GET_TSC: error = GET_TSC_CTL(arg2); break; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d358d4e3a958..b854a895591e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -393,6 +393,7 @@ void tick_nohz_restart_sched_tick(void) sub_preempt_count(HARDIRQ_OFFSET); } + touch_softlockup_watchdog(); /* * Cancel the scheduled timer and restore the tick */ |