diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/cgroup.c | 4 | ||||
| -rw-r--r-- | kernel/configs.c | 1 | ||||
| -rw-r--r-- | kernel/debug/debug_core.c | 139 | ||||
| -rw-r--r-- | kernel/debug/debug_core.h | 1 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_debugger.c | 3 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_io.c | 2 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_main.c | 18 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_private.h | 48 | ||||
| -rw-r--r-- | kernel/early_res.c | 590 | ||||
| -rw-r--r-- | kernel/gcov/fs.c | 1 | ||||
| -rw-r--r-- | kernel/kprobes.c | 1 | ||||
| -rw-r--r-- | kernel/pm_qos_params.c | 3 | ||||
| -rw-r--r-- | kernel/profile.c | 1 | ||||
| -rw-r--r-- | kernel/rtmutex-tester.c | 6 | ||||
| -rw-r--r-- | kernel/softirq.c | 2 | ||||
| -rw-r--r-- | kernel/sys_ni.c | 1 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 16 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_kdb.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_stack.c | 1 | ||||
| -rw-r--r-- | kernel/workqueue.c | 314 |
25 files changed, 312 insertions, 853 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e2c9d52cfe9e..0b5ff083fa22 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,7 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o jump_label.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 291ba3d04bea..7b69b8d0313d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -52,7 +52,6 @@ #include <linux/cgroupstats.h> #include <linux/hash.h> #include <linux/namei.h> -#include <linux/smp_lock.h> #include <linux/pid_namespace.h> #include <linux/idr.h> #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ @@ -1222,7 +1221,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; - lock_kernel(); mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -1255,7 +1253,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) kfree(opts.name); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); - unlock_kernel(); return ret; } @@ -1568,7 +1565,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, out_err: kfree(opts.release_agent); kfree(opts.name); - return ret; } diff --git a/kernel/configs.c b/kernel/configs.c index abaee684ecbf..b4066b44a99d 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -66,6 +66,7 @@ ikconfig_read_current(struct file *file, char __user *buf, static const struct file_operations ikconfig_file_ops = { .owner = THIS_MODULE, .read = ikconfig_read_current, + .llseek = default_llseek, }; static int __init ikconfig_init(void) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index de407c78178d..fec596da9bd0 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -47,6 +47,7 @@ #include <linux/pid.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/rcupdate.h> #include <asm/cacheflush.h> #include <asm/byteorder.h> @@ -109,13 +110,15 @@ static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { */ atomic_t kgdb_active = ATOMIC_INIT(-1); EXPORT_SYMBOL_GPL(kgdb_active); +static DEFINE_RAW_SPINLOCK(dbg_master_lock); +static DEFINE_RAW_SPINLOCK(dbg_slave_lock); /* * We use NR_CPUs not PERCPU, in case kgdb is used to debug early * bootup code (which might not have percpu set up yet): */ -static atomic_t passive_cpu_wait[NR_CPUS]; -static atomic_t cpu_in_kgdb[NR_CPUS]; +static atomic_t masters_in_kgdb; +static atomic_t slaves_in_kgdb; static atomic_t kgdb_break_tasklet_var; atomic_t kgdb_setting_breakpoint; @@ -457,26 +460,32 @@ static int kgdb_reenter_check(struct kgdb_state *ks) return 1; } -static void dbg_cpu_switch(int cpu, int next_cpu) +static void dbg_touch_watchdogs(void) { - /* Mark the cpu we are switching away from as a slave when it - * holds the kgdb_active token. This must be done so that the - * that all the cpus wait in for the debug core will not enter - * again as the master. */ - if (cpu == atomic_read(&kgdb_active)) { - kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; - kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER; - } - kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); } -static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) +static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, + int exception_state) { unsigned long flags; int sstep_tries = 100; int error; - int i, cpu; + int cpu; int trace_on = 0; + int online_cpus = num_online_cpus(); + + kgdb_info[ks->cpu].enter_kgdb++; + kgdb_info[ks->cpu].exception_state |= exception_state; + + if (exception_state == DCPU_WANT_MASTER) + atomic_inc(&masters_in_kgdb); + else + atomic_inc(&slaves_in_kgdb); + kgdb_disable_hw_debug(ks->linux_regs); + acquirelock: /* * Interrupts will be restored by the 'trap return' code, except when @@ -489,14 +498,15 @@ acquirelock: kgdb_info[cpu].task = current; kgdb_info[cpu].ret_state = 0; kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; - /* - * Make sure the above info reaches the primary CPU before - * our cpu_in_kgdb[] flag setting does: - */ - atomic_inc(&cpu_in_kgdb[cpu]); - if (exception_level == 1) + /* Make sure the above info reaches the primary CPU */ + smp_mb(); + + if (exception_level == 1) { + if (raw_spin_trylock(&dbg_master_lock)) + atomic_xchg(&kgdb_active, cpu); goto cpu_master_loop; + } /* * CPU will loop if it is a slave or request to become a kgdb @@ -508,10 +518,12 @@ cpu_loop: kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; goto cpu_master_loop; } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { - if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) + if (raw_spin_trylock(&dbg_master_lock)) { + atomic_xchg(&kgdb_active, cpu); break; + } } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { - if (!atomic_read(&passive_cpu_wait[cpu])) + if (!raw_spin_is_locked(&dbg_slave_lock)) goto return_normal; } else { return_normal: @@ -522,9 +534,12 @@ return_normal: arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); - atomic_dec(&cpu_in_kgdb[cpu]); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + kgdb_info[cpu].exception_state &= + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); + kgdb_info[cpu].enter_kgdb--; + smp_mb__before_atomic_dec(); + atomic_dec(&slaves_in_kgdb); + dbg_touch_watchdogs(); local_irq_restore(flags); return 0; } @@ -541,8 +556,8 @@ return_normal: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + raw_spin_unlock(&dbg_master_lock); + dbg_touch_watchdogs(); local_irq_restore(flags); goto acquirelock; @@ -563,16 +578,12 @@ return_normal: if (dbg_io_ops->pre_exception) dbg_io_ops->pre_exception(); - kgdb_disable_hw_debug(ks->linux_regs); - /* * Get the passive CPU lock which will hold all the non-primary * CPU in a spin state while the debugger is active */ - if (!kgdb_single_step) { - for (i = 0; i < NR_CPUS; i++) - atomic_inc(&passive_cpu_wait[i]); - } + if (!kgdb_single_step) + raw_spin_lock(&dbg_slave_lock); #ifdef CONFIG_SMP /* Signal the other CPUs to enter kgdb_wait() */ @@ -583,10 +594,9 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - for_each_online_cpu(i) { - while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i])) - cpu_relax(); - } + while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + + atomic_read(&slaves_in_kgdb)) != online_cpus) + cpu_relax(); /* * At this point the primary processor is completely @@ -615,7 +625,8 @@ cpu_master_loop: if (error == DBG_PASS_EVENT) { dbg_kdb_mode = !dbg_kdb_mode; } else if (error == DBG_SWITCH_CPU_EVENT) { - dbg_cpu_switch(cpu, dbg_switch_cpu); + kgdb_info[dbg_switch_cpu].exception_state |= + DCPU_NEXT_MASTER; goto cpu_loop; } else { kgdb_info[cpu].ret_state = error; @@ -627,24 +638,11 @@ cpu_master_loop: if (dbg_io_ops->post_exception) dbg_io_ops->post_exception(); - atomic_dec(&cpu_in_kgdb[ks->cpu]); - if (!kgdb_single_step) { - for (i = NR_CPUS-1; i >= 0; i--) - atomic_dec(&passive_cpu_wait[i]); - /* - * Wait till all the CPUs have quit from the debugger, - * but allow a CPU that hit an exception and is - * waiting to become the master to remain in the debug - * core. - */ - for_each_online_cpu(i) { - while (kgdb_do_roundup && - atomic_read(&cpu_in_kgdb[i]) && - !(kgdb_info[i].exception_state & - DCPU_WANT_MASTER)) - cpu_relax(); - } + raw_spin_unlock(&dbg_slave_lock); + /* Wait till all the CPUs have quit from the debugger. */ + while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb)) + cpu_relax(); } kgdb_restore: @@ -655,12 +653,20 @@ kgdb_restore: else kgdb_sstep_pid = 0; } + if (arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); + + kgdb_info[cpu].exception_state &= + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); + kgdb_info[cpu].enter_kgdb--; + smp_mb__before_atomic_dec(); + atomic_dec(&masters_in_kgdb); /* Free kgdb_active */ atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + raw_spin_unlock(&dbg_master_lock); + dbg_touch_watchdogs(); local_irq_restore(flags); return kgdb_info[cpu].ret_state; @@ -678,7 +684,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) { struct kgdb_state kgdb_var; struct kgdb_state *ks = &kgdb_var; - int ret; ks->cpu = raw_smp_processor_id(); ks->ex_vector = evector; @@ -689,11 +694,10 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) if (kgdb_reenter_check(ks)) return 0; /* Ouch, double exception ! */ - kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; - ret = kgdb_cpu_enter(ks, regs); - kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER | - DCPU_IS_SLAVE); - return ret; + if (kgdb_info[ks->cpu].enter_kgdb != 0) + return 0; + + return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); } int kgdb_nmicallback(int cpu, void *regs) @@ -706,12 +710,9 @@ int kgdb_nmicallback(int cpu, void *regs) ks->cpu = cpu; ks->linux_regs = regs; - if (!atomic_read(&cpu_in_kgdb[cpu]) && - atomic_read(&kgdb_active) != -1 && - atomic_read(&kgdb_active) != cpu) { - kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; - kgdb_cpu_enter(ks, regs); - kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; + if (kgdb_info[ks->cpu].enter_kgdb == 0 && + raw_spin_is_locked(&dbg_master_lock)) { + kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE); return 0; } #endif diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index c5d753d80f67..3494c28a7e7a 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h @@ -40,6 +40,7 @@ struct debuggerinfo_struct { int exception_state; int ret_state; int irq_depth; + int enter_kgdb; }; extern struct debuggerinfo_struct kgdb_info[]; diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index bf6e8270e957..dd0b1b7dd02c 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -86,7 +86,7 @@ int kdb_stub(struct kgdb_state *ks) } /* Set initial kdb state variables */ KDB_STATE_CLEAR(KGDB_TRANS); - kdb_initial_cpu = ks->cpu; + kdb_initial_cpu = atomic_read(&kgdb_active); kdb_current_task = kgdb_info[ks->cpu].task; kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; /* Remove any breakpoints as needed by kdb and clear single step */ @@ -105,7 +105,6 @@ int kdb_stub(struct kgdb_state *ks) ks->pass_exception = 1; KDB_FLAG_SET(CATASTROPHIC); } - kdb_initial_cpu = ks->cpu; if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { KDB_STATE_CLEAR(SSBPT); KDB_STATE_CLEAR(DOING_SS); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index c9b7f4f90bba..96fdaac46a80 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -823,4 +823,4 @@ int kdb_printf(const char *fmt, ...) return r; } - +EXPORT_SYMBOL_GPL(kdb_printf); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index caf057a3de0e..d7bda21a106b 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1749,13 +1749,13 @@ static int kdb_go(int argc, const char **argv) int nextarg; long offset; + if (raw_smp_processor_id() != kdb_initial_cpu) { + kdb_printf("go must execute on the entry cpu, " + "please use \"cpu %d\" and then execute go\n", + kdb_initial_cpu); + return KDB_BADCPUNUM; + } if (argc == 1) { - if (raw_smp_processor_id() != kdb_initial_cpu) { - kdb_printf("go <address> must be issued from the " - "initial cpu, do cpu %d first\n", - kdb_initial_cpu); - return KDB_ARGCOUNT; - } nextarg = 1; diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL); @@ -2783,6 +2783,8 @@ int kdb_register_repeat(char *cmd, return 0; } +EXPORT_SYMBOL_GPL(kdb_register_repeat); + /* * kdb_register - Compatibility register function for commands that do @@ -2805,6 +2807,7 @@ int kdb_register(char *cmd, return kdb_register_repeat(cmd, func, usage, help, minlen, KDB_REPEAT_NONE); } +EXPORT_SYMBOL_GPL(kdb_register); /* * kdb_unregister - This function is used to unregister a kernel @@ -2823,7 +2826,7 @@ int kdb_unregister(char *cmd) /* * find the command. */ - for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) { + for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kp->cmd_name = NULL; return 0; @@ -2833,6 +2836,7 @@ int kdb_unregister(char *cmd) /* Couldn't find it. */ return 1; } +EXPORT_SYMBOL_GPL(kdb_unregister); /* Initialize the kdb command table. */ static void __init kdb_inittab(void) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index be775f7e81e0..35d69ed1dfb5 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -15,29 +15,6 @@ #include <linux/kgdb.h> #include "../debug_core.h" -/* Kernel Debugger Error codes. Must not overlap with command codes. */ -#define KDB_NOTFOUND (-1) -#define KDB_ARGCOUNT (-2) -#define KDB_BADWIDTH (-3) -#define KDB_BADRADIX (-4) -#define KDB_NOTENV (-5) -#define KDB_NOENVVALUE (-6) -#define KDB_NOTIMP (-7) -#define KDB_ENVFULL (-8) -#define KDB_ENVBUFFULL (-9) -#define KDB_TOOMANYBPT (-10) -#define KDB_TOOMANYDBREGS (-11) -#define KDB_DUPBPT (-12) -#define KDB_BPTNOTFOUND (-13) -#define KDB_BADMODE (-14) -#define KDB_BADINT (-15) -#define KDB_INVADDRFMT (-16) -#define KDB_BADREG (-17) -#define KDB_BADCPUNUM (-18) -#define KDB_BADLENGTH (-19) -#define KDB_NOBP (-20) -#define KDB_BADADDR (-21) - /* Kernel Debugger Command codes. Must not overlap with error codes. */ #define KDB_CMD_GO (-1001) #define KDB_CMD_CPU (-1002) @@ -93,17 +70,6 @@ */ #define KDB_MAXBPT 16 -/* Maximum number of arguments to a function */ -#define KDB_MAXARGS 16 - -typedef enum { - KDB_REPEAT_NONE = 0, /* Do not repeat this command */ - KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ - KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ -} kdb_repeat_t; - -typedef int (*kdb_func_t)(int, const char **); - /* Symbol table format returned by kallsyms. */ typedef struct __ksymtab { unsigned long value; /* Address of symbol */ @@ -123,11 +89,6 @@ extern int kallsyms_symbol_next(char *prefix_name, int flag); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ -extern int kdb_register(char *, kdb_func_t, char *, char *, short); -extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, - short, kdb_repeat_t); -extern int kdb_unregister(char *); - extern int kdb_getarea_size(void *, unsigned long, size_t); extern int kdb_putarea_size(unsigned long, void *, size_t); @@ -144,6 +105,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t); extern int kdb_putword(unsigned long, unsigned long, size_t); extern int kdbgetularg(const char *, unsigned long *); +extern int kdbgetu64arg(const char *, u64 *); extern char *kdbgetenv(const char *); extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, long *, char **); @@ -255,14 +217,6 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); -#ifdef CONFIG_KALLSYMS -extern const char *kdb_walk_kallsyms(loff_t *pos); -#else /* ! CONFIG_KALLSYMS */ -static inline const char *kdb_walk_kallsyms(loff_t *pos) -{ - return NULL; -} -#endif /* ! CONFIG_KALLSYMS */ extern char *kdb_getstr(char *, size_t, char *); /* Defines for kdb_symbol_print */ diff --git a/kernel/early_res.c b/kernel/early_res.c deleted file mode 100644 index 7bfae887f211..000000000000 --- a/kernel/early_res.c +++ /dev/null @@ -1,590 +0,0 @@ -/* - * early_res, could be used to replace bootmem - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/mm.h> -#include <linux/early_res.h> -#include <linux/slab.h> -#include <linux/kmemleak.h> - -/* - * Early reserved memory areas. - */ -/* - * need to make sure this one is bigger enough before - * find_fw_memmap_area could be used - */ -#define MAX_EARLY_RES_X 32 - -struct early_res { - u64 start, end; - char name[15]; - char overlap_ok; -}; -static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; - -static int max_early_res __initdata = MAX_EARLY_RES_X; -static struct early_res *early_res __initdata = &early_res_x[0]; -static int early_res_count __initdata; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < max_early_res && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; - early_res_count--; -} - -static void __init drop_range_partial(int i, u64 start, u64 end) -{ - u64 common_start, common_end; - u64 old_start, old_end; - - old_start = early_res[i].start; - old_end = early_res[i].end; - common_start = max(old_start, start); - common_end = min(old_end, end); - - /* no overlap ? */ - if (common_start >= common_end) - return; - - if (old_start < common_start) { - /* make head segment */ - early_res[i].end = common_start; - if (old_end > common_end) { - char name[15]; - - /* - * Save a local copy of the name, since the - * early_res array could get resized inside - * reserve_early_without_check() -> - * __check_and_double_early_res(), which would - * make the current name pointer invalid. - */ - strncpy(name, early_res[i].name, - sizeof(early_res[i].name) - 1); - /* add another for left over on tail */ - reserve_early_without_check(common_end, old_end, name); - } - return; - } else { - if (old_end > common_end) { - /* reuse the entry for tail left */ - early_res[i].start = common_end; - return; - } - /* all covered */ - drop_range(i); - } -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. - */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) -{ - int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[15]; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) - continue; - - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= max_early_res) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name ? name : "", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} - -static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) -{ - u64 start, end, size, mem; - struct early_res *new; - - /* do we have enough slots left ? */ - if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) - return; - - /* double it */ - mem = -1ULL; - size = sizeof(struct early_res) * max_early_res * 2; - if (early_res == early_res_x) - start = 0; - else - start = early_res[0].end; - end = ex_start; - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - if (mem == -1ULL) { - start = ex_end; - end = get_max_mapped(); - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - } - if (mem == -1ULL) - panic("can not find more space for early_res array"); - - new = __va(mem); - /* save the first one for own */ - new[0].start = mem; - new[0].end = mem + size; - new[0].overlap_ok = 0; - /* copy old to new */ - if (early_res == early_res_x) { - memcpy(&new[1], &early_res[0], - sizeof(struct early_res) * max_early_res); - memset(&new[max_early_res+1], 0, - sizeof(struct early_res) * (max_early_res - 1)); - early_res_count++; - } else { - memcpy(&new[1], &early_res[1], - sizeof(struct early_res) * (max_early_res - 1)); - memset(&new[max_early_res], 0, - sizeof(struct early_res) * max_early_res); - } - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = new; - max_early_res *= 2; - printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", - max_early_res, mem, mem + size - 1); -} - -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ -void __init reserve_early(u64 start, u64 end, char *name) -{ - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init reserve_early_without_check(u64 start, u64 end, char *name) -{ - struct early_res *r; - - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - r = &early_res[early_res_count]; - - r->start = start; - r->end = end; - r->overlap_ok = 0; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -void __init free_early(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= max_early_res || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init free_early_partial(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - if (start == end) - return; - - if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) - return; - -try_next: - i = find_overlapped_early(start, end); - if (i >= max_early_res) - return; - - r = &early_res[i]; - /* hole ? */ - if (r->end >= end && r->start <= start) { - drop_range_partial(i, start, end); - return; - } - - drop_range_partial(i, start, end); - goto try_next; -} - -#ifdef CONFIG_NO_BOOTMEM -static void __init subtract_early_res(struct range *range, int az) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - -#define DEBUG_PRINT_EARLY_RES 1 - -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO "Subtract (%d early reservations)\n", count); -#endif - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, - r->start, r->end, r->name); -#endif - final_start = PFN_DOWN(r->start); - final_end = PFN_UP(r->end); - if (final_start >= final_end) - continue; - subtract_range(range, az, final_start, final_end); - } - -} - -int __init get_free_all_memory_range(struct range **rangep, int nodeid) -{ - int i, count; - u64 start = 0, end; - u64 size; - u64 mem; - struct range *range; - int nr_range; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - count *= 2; - - size = sizeof(struct range) * count; - end = get_max_mapped(); -#ifdef MAX_DMA32_PFN - if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) - start = MAX_DMA32_PFN << PAGE_SHIFT; -#endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); - if (mem == -1ULL) - panic("can not find more space for range free"); - - range = __va(mem); - /* use early_node_map[] and early_res to get range array at first */ - memset(range, 0, size); - nr_range = 0; - - /* need to go over early_node_map to find out good range for node */ - nr_range = add_from_early_node_map(range, count, nr_range, nodeid); -#ifdef CONFIG_X86_32 - subtract_range(range, count, max_low_pfn, -1ULL); -#endif - subtract_early_res(range, count); - nr_range = clean_sort_range(range, count); - - /* need to clear it ? */ - if (nodeid == MAX_NUMNODES) { - memset(&early_res[0], 0, - sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - } - - *rangep = range; - return nr_range; -} -#else -void __init early_res_to_bootmem(u64 start, u64 end) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - - printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count - idx, max_early_res, start, end); - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; - printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) { - printk(KERN_CONT "\n"); - continue; - } - printk(KERN_CONT " ==> [%010llx - %010llx]\n", - final_start, final_end); - reserve_bootmem_generic(final_start, final_end - final_start, - BOOTMEM_DEFAULT); - } - /* clear them */ - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - early_res_count = 0; -} -#endif - -/* Check for already reserved areas */ -static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) -{ - int i; - u64 addr = *addrp; - int changed = 0; - struct early_res *r; -again: - i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < max_early_res && r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) -{ - int i; - u64 addr = *addrp, last; - u64 size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < max_early_res && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - * only with the area.between start to end is active range from early_node_map - * so they are good as RAM - */ -u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - goto out; - if (last > end) - goto out; - - return addr; - -out: - return -1ULL; -} - -u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - goto out; - - return addr; - -out: - return -1ULL; -} diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index f83972b16564..9bd0934f6c33 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -561,6 +561,7 @@ static ssize_t reset_read(struct file *file, char __user *addr, size_t len, static const struct file_operations gcov_reset_fops = { .write = reset_write, .read = reset_read, + .llseek = noop_llseek, }; /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ec4210c6501e..56a891914273 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2000,6 +2000,7 @@ static ssize_t write_enabled_file_bool(struct file *file, static const struct file_operations fops_kp = { .read = read_enabled_file_bool, .write = write_enabled_file_bool, + .llseek = default_llseek, }; static int __kprobes debugfs_kprobe_init(void) diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 645e541a45f6..c7a8f453919e 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -110,6 +110,7 @@ static const struct file_operations pm_qos_power_fops = { .write = pm_qos_power_write, .open = pm_qos_power_open, .release = pm_qos_power_release, + .llseek = noop_llseek, }; /* unlocked internal variant */ @@ -398,7 +399,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, } else return -EINVAL; - pm_qos_req = (struct pm_qos_request_list *)filp->private_data; + pm_qos_req = filp->private_data; pm_qos_update_request(pm_qos_req, value); return count; diff --git a/kernel/profile.c b/kernel/profile.c index b22a899934cc..66f841b7fbd3 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -555,6 +555,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, static const struct file_operations proc_profile_operations = { .read = read_profile, .write = write_profile, + .llseek = default_llseek, }; #ifdef CONFIG_SMP diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index a56f629b057a..66cb89bc5ef1 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -76,7 +76,9 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) } if (!lockwakeup && td->bkl == 4) { +#ifdef CONFIG_LOCK_KERNEL unlock_kernel(); +#endif td->bkl = 0; } return 0; @@ -133,14 +135,18 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) if (td->bkl) return 0; td->bkl = 1; +#ifdef CONFIG_LOCK_KERNEL lock_kernel(); +#endif td->bkl = 4; return 0; case RTTEST_UNLOCKBKL: if (td->bkl != 4) break; +#ifdef CONFIG_LOCK_KERNEL unlock_kernel(); +#endif td->bkl = 0; return 0; diff --git a/kernel/softirq.c b/kernel/softirq.c index fc978889b194..f02a9dfa19bc 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -67,7 +67,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -void wakeup_softirqd(void) +static void wakeup_softirqd(void) { /* Interrupts are disabled: no need to stop preemption */ struct task_struct *tsk = __get_cpu_var(ksoftirqd); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bad369ec5403..c782fe9924c7 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -50,6 +50,7 @@ cond_syscall(compat_sys_sendmsg); cond_syscall(sys_recvmsg); cond_syscall(sys_recvmmsg); cond_syscall(compat_sys_recvmsg); +cond_syscall(compat_sys_recv); cond_syscall(compat_sys_recvfrom); cond_syscall(compat_sys_recvmmsg); cond_syscall(sys_socketcall); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 959f8d6c8cc1..bc251ed66724 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -23,7 +23,6 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/debugfs.h> -#include <linux/smp_lock.h> #include <linux/time.h> #include <linux/uaccess.h> @@ -326,6 +325,7 @@ static const struct file_operations blk_dropped_fops = { .owner = THIS_MODULE, .open = blk_dropped_open, .read = blk_dropped_read, + .llseek = default_llseek, }; static int blk_msg_open(struct inode *inode, struct file *filp) @@ -365,6 +365,7 @@ static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, .open = blk_msg_open, .write = blk_msg_write, + .llseek = noop_llseek, }; /* @@ -639,7 +640,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO; - lock_kernel(); mutex_lock(&bdev->bd_mutex); switch (cmd) { @@ -667,7 +667,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) } mutex_unlock(&bdev->bd_mutex); - unlock_kernel(); return ret; } @@ -1652,10 +1651,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct block_device *bdev; ssize_t ret = -ENXIO; - lock_kernel(); bdev = bdget(part_devt(p)); if (bdev == NULL) - goto out_unlock_kernel; + goto out; q = blk_trace_get_queue(bdev); if (q == NULL) @@ -1683,8 +1681,7 @@ out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); -out_unlock_kernel: - unlock_kernel(); +out: return ret; } @@ -1714,11 +1711,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, ret = -ENXIO; - lock_kernel(); p = dev_to_part(dev); bdev = bdget(part_devt(p)); if (bdev == NULL) - goto out_unlock_kernel; + goto out; q = blk_trace_get_queue(bdev); if (q == NULL) @@ -1753,8 +1749,6 @@ out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); -out_unlock_kernel: - unlock_kernel(); out: return ret ? ret : count; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ebd80d50c474..f3dadae83883 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -800,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = { .open = tracing_open_generic, .read = ftrace_profile_read, .write = ftrace_profile_write, + .llseek = default_llseek, }; /* used to initialize the real stat files */ @@ -2669,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = { .read = seq_read, .write = ftrace_graph_write, .release = ftrace_graph_release, + .llseek = seq_lseek, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c5a632a669e1..c3dab054d18e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3974,6 +3974,7 @@ static const struct file_operations rb_simple_fops = { .open = tracing_open_generic, .read = rb_simple_read, .write = rb_simple_write, + .llseek = default_llseek, }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 398c0e8b332c..0725eeab1937 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -932,6 +932,7 @@ static const struct file_operations ftrace_enable_fops = { .open = tracing_open_generic, .read = event_enable_read, .write = event_enable_write, + .llseek = default_llseek, }; static const struct file_operations ftrace_event_format_fops = { @@ -944,29 +945,34 @@ static const struct file_operations ftrace_event_format_fops = { static const struct file_operations ftrace_event_id_fops = { .open = tracing_open_generic, .read = event_id_read, + .llseek = default_llseek, }; static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_generic, .read = event_filter_read, .write = event_filter_write, + .llseek = default_llseek, }; static const struct file_operations ftrace_subsystem_filter_fops = { .open = tracing_open_generic, .read = subsystem_filter_read, .write = subsystem_filter_write, + .llseek = default_llseek, }; static const struct file_operations ftrace_system_enable_fops = { .open = tracing_open_generic, .read = system_enable_read, .write = system_enable_write, + .llseek = default_llseek, }; static const struct file_operations ftrace_show_header_fops = { .open = tracing_open_generic, .read = show_header, + .llseek = default_llseek, }; static struct dentry *event_trace_events_dir(void) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 7b8ecd751d93..3c5c5dfea0b3 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -13,7 +13,6 @@ #include <linux/kdb.h> #include <linux/ftrace.h> -#include "../debug/kdb/kdb_private.h" #include "trace.h" #include "trace_output.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 544301d29dee..b8d2852baa4a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -648,7 +648,7 @@ static int register_trace_probe(struct trace_probe *tp) } ret = register_probe_event(tp); if (ret) { - pr_warning("Faild to register probe event(%d)\n", ret); + pr_warning("Failed to register probe event(%d)\n", ret); goto end; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index a6b7e0e0f3eb..4c5dead0c239 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = { .open = tracing_open_generic, .read = stack_max_size_read, .write = stack_max_size_write, + .llseek = default_llseek, }; static void * diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f77afd939229..e5ff2cbaadc2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -42,9 +42,6 @@ #include <linux/lockdep.h> #include <linux/idr.h> -#define CREATE_TRACE_POINTS -#include <trace/events/workqueue.h> - #include "workqueue_sched.h" enum { @@ -257,6 +254,9 @@ EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); +#define CREATE_TRACE_POINTS +#include <trace/events/workqueue.h> + #define for_each_busy_worker(worker, i, pos, gcwq) \ for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) @@ -310,21 +310,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, (cpu) < WORK_CPU_NONE; \ (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) -#ifdef CONFIG_LOCKDEP -/** - * in_workqueue_context() - in context of specified workqueue? - * @wq: the workqueue of interest - * - * Checks lockdep state to see if the current task is executing from - * within a workqueue item. This function exists only if lockdep is - * enabled. - */ -int in_workqueue_context(struct workqueue_struct *wq) -{ - return lock_is_held(&wq->lockdep_map); -} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_WORK static struct debug_obj_descr work_debug_descr; @@ -604,7 +589,9 @@ static bool keep_working(struct global_cwq *gcwq) { atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); - return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1; + return !list_empty(&gcwq->worklist) && + (atomic_read(nr_running) <= 1 || + gcwq->flags & GCWQ_HIGHPRI_PENDING); } /* Do we need a new worker? Called from manager. */ @@ -997,6 +984,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, /* gcwq determined, get cwq and queue */ cwq = get_cwq(gcwq->cpu, wq); + trace_workqueue_queue_work(cpu, cwq, work); BUG_ON(!list_empty(&work->entry)); @@ -1004,6 +992,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, work_flags = work_color_to_flags(cwq->work_color); if (likely(cwq->nr_active < cwq->max_active)) { + trace_workqueue_activate_work(work); cwq->nr_active++; worklist = gcwq_determine_ins_pos(gcwq, cwq); } else { @@ -1679,6 +1668,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) struct work_struct, entry); struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); + trace_workqueue_activate_work(work); move_linked_works(work, pos, NULL); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); cwq->nr_active++; @@ -2326,27 +2316,17 @@ out_unlock: } EXPORT_SYMBOL_GPL(flush_workqueue); -/** - * flush_work - block until a work_struct's callback has terminated - * @work: the work which is to be flushed - * - * Returns false if @work has already terminated. - * - * It is expected that, prior to calling flush_work(), the caller has - * arranged for the work to not be requeued, otherwise it doesn't make - * sense to use this function. - */ -int flush_work(struct work_struct *work) +static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, + bool wait_executing) { struct worker *worker = NULL; struct global_cwq *gcwq; struct cpu_workqueue_struct *cwq; - struct wq_barrier barr; might_sleep(); gcwq = get_work_gcwq(work); if (!gcwq) - return 0; + return false; spin_lock_irq(&gcwq->lock); if (!list_empty(&work->entry)) { @@ -2359,28 +2339,127 @@ int flush_work(struct work_struct *work) cwq = get_work_cwq(work); if (unlikely(!cwq || gcwq != cwq->gcwq)) goto already_gone; - } else { + } else if (wait_executing) { worker = find_worker_executing_work(gcwq, work); if (!worker) goto already_gone; cwq = worker->current_cwq; - } + } else + goto already_gone; - insert_wq_barrier(cwq, &barr, work, worker); + insert_wq_barrier(cwq, barr, work, worker); spin_unlock_irq(&gcwq->lock); lock_map_acquire(&cwq->wq->lockdep_map); lock_map_release(&cwq->wq->lockdep_map); - - wait_for_completion(&barr.done); - destroy_work_on_stack(&barr.work); - return 1; + return true; already_gone: spin_unlock_irq(&gcwq->lock); - return 0; + return false; +} + +/** + * flush_work - wait for a work to finish executing the last queueing instance + * @work: the work to flush + * + * Wait until @work has finished execution. This function considers + * only the last queueing instance of @work. If @work has been + * enqueued across different CPUs on a non-reentrant workqueue or on + * multiple workqueues, @work might still be executing on return on + * some of the CPUs from earlier queueing. + * + * If @work was queued only on a non-reentrant, ordered or unbound + * workqueue, @work is guaranteed to be idle on return if it hasn't + * been requeued since flush started. + * + * RETURNS: + * %true if flush_work() waited for the work to finish execution, + * %false if it was already idle. + */ +bool flush_work(struct work_struct *work) +{ + struct wq_barrier barr; + + if (start_flush_work(work, &barr, true)) { + wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + return true; + } else + return false; } EXPORT_SYMBOL_GPL(flush_work); +static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) +{ + struct wq_barrier barr; + struct worker *worker; + + spin_lock_irq(&gcwq->lock); + + worker = find_worker_executing_work(gcwq, work); + if (unlikely(worker)) + insert_wq_barrier(worker->current_cwq, &barr, work, worker); + + spin_unlock_irq(&gcwq->lock); + + if (unlikely(worker)) { + wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + return true; + } else + return false; +} + +static bool wait_on_work(struct work_struct *work) +{ + bool ret = false; + int cpu; + + might_sleep(); + + lock_map_acquire(&work->lockdep_map); + lock_map_release(&work->lockdep_map); + + for_each_gcwq_cpu(cpu) + ret |= wait_on_cpu_work(get_gcwq(cpu), work); + return ret; +} + +/** + * flush_work_sync - wait until a work has finished execution + * @work: the work to flush + * + * Wait until @work has finished execution. On return, it's + * guaranteed that all queueing instances of @work which happened + * before this function is called are finished. In other words, if + * @work hasn't been requeued since this function was called, @work is + * guaranteed to be idle on return. + * + * RETURNS: + * %true if flush_work_sync() waited for the work to finish execution, + * %false if it was already idle. + */ +bool flush_work_sync(struct work_struct *work) +{ + struct wq_barrier barr; + bool pending, waited; + + /* we'll wait for executions separately, queue barr only if pending */ + pending = start_flush_work(work, &barr, false); + + /* wait for executions to finish */ + waited = wait_on_work(work); + + /* wait for the pending one */ + if (pending) { + wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + } + + return pending || waited; +} +EXPORT_SYMBOL_GPL(flush_work_sync); + /* * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, * so this work can't be re-armed in any way. @@ -2423,39 +2502,7 @@ static int try_to_grab_pending(struct work_struct *work) return ret; } -static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) -{ - struct wq_barrier barr; - struct worker *worker; - - spin_lock_irq(&gcwq->lock); - - worker = find_worker_executing_work(gcwq, work); - if (unlikely(worker)) - insert_wq_barrier(worker->current_cwq, &barr, work, worker); - - spin_unlock_irq(&gcwq->lock); - - if (unlikely(worker)) { - wait_for_completion(&barr.done); - destroy_work_on_stack(&barr.work); - } -} - -static void wait_on_work(struct work_struct *work) -{ - int cpu; - - might_sleep(); - - lock_map_acquire(&work->lockdep_map); - lock_map_release(&work->lockdep_map); - - for_each_gcwq_cpu(cpu) - wait_on_cpu_work(get_gcwq(cpu), work); -} - -static int __cancel_work_timer(struct work_struct *work, +static bool __cancel_work_timer(struct work_struct *work, struct timer_list* timer) { int ret; @@ -2472,42 +2519,81 @@ static int __cancel_work_timer(struct work_struct *work, } /** - * cancel_work_sync - block until a work_struct's callback has terminated - * @work: the work which is to be flushed - * - * Returns true if @work was pending. + * cancel_work_sync - cancel a work and wait for it to finish + * @work: the work to cancel * - * cancel_work_sync() will cancel the work if it is queued. If the work's - * callback appears to be running, cancel_work_sync() will block until it - * has completed. + * Cancel @work and wait for its execution to finish. This function + * can be used even if the work re-queues itself or migrates to + * another workqueue. On return from this function, @work is + * guaranteed to be not pending or executing on any CPU. * - * It is possible to use this function if the work re-queues itself. It can - * cancel the work even if it migrates to another workqueue, however in that - * case it only guarantees that work->func() has completed on the last queued - * workqueue. + * cancel_work_sync(&delayed_work->work) must not be used for + * delayed_work's. Use cancel_delayed_work_sync() instead. * - * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not - * pending, otherwise it goes into a busy-wait loop until the timer expires. - * - * The caller must ensure that workqueue_struct on which this work was last + * The caller must ensure that the workqueue on which @work was last * queued can't be destroyed before this function returns. + * + * RETURNS: + * %true if @work was pending, %false otherwise. */ -int cancel_work_sync(struct work_struct *work) +bool cancel_work_sync(struct work_struct *work) { return __cancel_work_timer(work, NULL); } EXPORT_SYMBOL_GPL(cancel_work_sync); /** - * cancel_delayed_work_sync - reliably kill off a delayed work. - * @dwork: the delayed work struct + * flush_delayed_work - wait for a dwork to finish executing the last queueing + * @dwork: the delayed work to flush + * + * Delayed timer is cancelled and the pending work is queued for + * immediate execution. Like flush_work(), this function only + * considers the last queueing instance of @dwork. + * + * RETURNS: + * %true if flush_work() waited for the work to finish execution, + * %false if it was already idle. + */ +bool flush_delayed_work(struct delayed_work *dwork) +{ + if (del_timer_sync(&dwork->timer)) + __queue_work(raw_smp_processor_id(), + get_work_cwq(&dwork->work)->wq, &dwork->work); + return flush_work(&dwork->work); +} +EXPORT_SYMBOL(flush_delayed_work); + +/** + * flush_delayed_work_sync - wait for a dwork to finish + * @dwork: the delayed work to flush * - * Returns true if @dwork was pending. + * Delayed timer is cancelled and the pending work is queued for + * execution immediately. Other than timer handling, its behavior + * is identical to flush_work_sync(). * - * It is possible to use this function if @dwork rearms itself via queue_work() - * or queue_delayed_work(). See also the comment for cancel_work_sync(). + * RETURNS: + * %true if flush_work_sync() waited for the work to finish execution, + * %false if it was already idle. */ -int cancel_delayed_work_sync(struct delayed_work *dwork) +bool flush_delayed_work_sync(struct delayed_work *dwork) +{ + if (del_timer_sync(&dwork->timer)) + __queue_work(raw_smp_processor_id(), + get_work_cwq(&dwork->work)->wq, &dwork->work); + return flush_work_sync(&dwork->work); +} +EXPORT_SYMBOL(flush_delayed_work_sync); + +/** + * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish + * @dwork: the delayed work cancel + * + * This is cancel_work_sync() for delayed works. + * + * RETURNS: + * %true if @dwork was pending, %false otherwise. + */ +bool cancel_delayed_work_sync(struct delayed_work *dwork) { return __cancel_work_timer(&dwork->work, &dwork->timer); } @@ -2559,23 +2645,6 @@ int schedule_delayed_work(struct delayed_work *dwork, EXPORT_SYMBOL(schedule_delayed_work); /** - * flush_delayed_work - block until a dwork_struct's callback has terminated - * @dwork: the delayed work which is to be flushed - * - * Any timeout is cancelled, and any pending work is run immediately. - */ -void flush_delayed_work(struct delayed_work *dwork) -{ - if (del_timer_sync(&dwork->timer)) { - __queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq, - &dwork->work); - put_cpu(); - } - flush_work(&dwork->work); -} -EXPORT_SYMBOL(flush_delayed_work); - -/** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done @@ -2592,13 +2661,15 @@ int schedule_delayed_work_on(int cpu, EXPORT_SYMBOL(schedule_delayed_work_on); /** - * schedule_on_each_cpu - call a function on each online CPU from keventd + * schedule_on_each_cpu - execute a function synchronously on each online CPU * @func: the function to call * - * Returns zero on success. - * Returns -ve errno on failure. - * + * schedule_on_each_cpu() executes @func on each online CPU using the + * system workqueue and blocks until all CPUs have completed. * schedule_on_each_cpu() is very slow. + * + * RETURNS: + * 0 on success, -errno on failure. */ int schedule_on_each_cpu(work_func_t func) { @@ -2720,7 +2791,9 @@ static int alloc_cwqs(struct workqueue_struct *wq) } } - /* just in case, make sure it's actually aligned */ + /* just in case, make sure it's actually aligned + * - this is affected by PERCPU() alignment in vmlinux.lds.S + */ BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); return wq->cpu_wq.v ? 0 : -ENOMEM; } @@ -2764,6 +2837,13 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, unsigned int cpu; /* + * Workqueues which may be used during memory reclaim should + * have a rescuer to guarantee forward progress. + */ + if (flags & WQ_MEM_RECLAIM) + flags |= WQ_RESCUER; + + /* * Unbound workqueues aren't concurrency managed and should be * dispatched to workers immediately. */ |
