From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/ds.c | 953 ++++++++++++++++++++++++++++++------------- arch/x86/kernel/process_32.c | 25 +- arch/x86/kernel/process_64.c | 25 +- arch/x86/kernel/ptrace.c | 444 ++++++++++++-------- arch/x86/kernel/setup_64.c | 3 +- 6 files changed, 991 insertions(+), 462 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe9224c51d37..cbffa2a25a13 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..5b32b6d062b4 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,26 +2,48 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ + +#ifdef CONFIG_X86_DS + #include #include #include #include +#include + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the size of the DS structure in bytes */ + unsigned char sizeof_ds; + /* the size of one pointer-typed field in the DS structure in bytes; + this covers the first 8 fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; +}; +static struct ds_configuration ds_cfg; /* @@ -44,378 +66,747 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. - * - * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, +}; -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The configuration for a particular DS/BTS hardware implementation. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; -}; +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * The global configuration used by the below accessor functions + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. */ -static struct ds_configuration ds_cfg; +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * The below functions to get and put tracers and to check the + * allocation type require the ds_lock to be held by the caller. + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline unsigned long get_bts_buffer_base(char *base) +static long tracers; + +static inline void get_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); + tracers += (task ? 1 : -1); } -static inline void set_bts_buffer_base(char *base, unsigned long value) + +static inline void put_tracer(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; + tracers -= (task ? 1 : -1); } -static inline unsigned long get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_index.offset); + return (task ? (tracers >= 0) : (tracers <= 0)); } -static inline void set_bts_index(char *base, unsigned long value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. + */ +static DEFINE_PER_CPU(struct ds_context *, system_context); + +#define this_system_context per_cpu(system_context, smp_processor_id()) + +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context *context; + + spin_lock(&ds_lock); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock(&ds_lock); + + return context; } -static inline unsigned long get_bts_absolute_maximum(char *base) + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + * + * pre: requires ds_lock to be held + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &this_system_context); + struct ds_context *context = *p_context; + + if (!context) { + context = kzalloc(sizeof(*context), GFP_KERNEL); + + if (!context) + return 0; + + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return 0; + } + + *p_context = context; + + context->this = p_context; + context->task = task; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + + get_tracer(task); + } + + context->count++; + + return context; } -static inline void set_bts_absolute_maximum(char *base, unsigned long value) + +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ +static inline void ds_put_context(struct ds_context *context) { - (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + if (!context) + return; + + spin_lock(&ds_lock); + + if (--context->count) + goto out; + + *(context->this) = 0; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); + kfree(context); + out: + spin_unlock(&ds_lock); } -static inline unsigned long get_bts_interrupt_threshold(char *base) + + +/* + * Handle a buffer overflow + * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static inline void set_bts_interrupt_threshold(char *base, unsigned long value) + + +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + unsigned long rlim, vm, pgsz; + void *buffer; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return 0; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return 0; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return 0; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static inline unsigned long get_from_ip(char *base) + +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.from_ip.offset); + struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); + int error = 0; + + if (!ds_cfg.sizeof_ds) + return -EOPNOTSUPP; + + /* we require some space to do alignment adjustments below */ + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; + + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; + + + spin_lock(&ds_lock); + + if (!check_tracer(task)) + return -EPERM; + + error = -ENOMEM; + context = ds_alloc_context(task); + if (!context) + goto out_unlock; + + error = -EALREADY; + if (context->owner[qual] == current) + goto out_unlock; + error = -EPERM; + if (context->owner[qual] != 0) + goto out_unlock; + context->owner[qual] = current; + + spin_unlock(&ds_lock); + + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; + + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = 0; + ds_put_context(context); + return error; + + out_unlock: + spin_unlock(&ds_lock); + ds_put_context(context); + return error; } -static inline void set_from_ip(char *base, unsigned long value) + +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; + return ds_request(task, base, size, ovfl, ds_bts); } -static inline unsigned long get_to_ip(char *base) + +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - return *(unsigned long *)(base + ds_cfg.to_ip.offset); + return ds_request(task, base, size, ovfl, ds_pebs); } -static inline void set_to_ip(char *base, unsigned long value) + +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; + struct ds_context *context; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + kfree(context->buffer[qual]); + context->buffer[qual] = 0; + + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = 0; + + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); + out: + ds_put_context(context); + return error; } -static inline unsigned char get_info_type(char *base) + +int ds_release_bts(struct task_struct *task) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + return ds_release(task, ds_bts); } -static inline void set_info_type(char *base, unsigned char value) + +int ds_release_pebs(struct task_struct *task) { - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + return ds_release(task, ds_pebs); } -static inline unsigned long get_info_data(char *base) + +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -static inline void set_info_data(char *base, unsigned long value) + +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; + return ds_get_index(task, pos, ds_bts); } +int ds_get_pebs_index(struct task_struct *task, size_t *pos) +{ + return ds_get_index(task, pos, ds_pebs); +} -int ds_allocate(void **dsp, size_t bts_size_in_bytes) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - size_t bts_size_in_records; - unsigned long bts; - void *ds; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); + + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +int ds_get_bts_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_bts); +} - if (bts_size_in_bytes < 0) - return -EINVAL; +int ds_get_pebs_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_pebs); +} - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) +{ + struct ds_context *context; + unsigned long base, idx; + int error; - if (bts_size_in_bytes <= 0) + if (!record) return -EINVAL; - bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (!bts) - return -ENOMEM; + base = ds_get(context->ds, qual, ds_buffer_base); + idx = base + (index * ds_cfg.sizeof_rec[qual]); - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + error = -EINVAL; + if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) + goto out; - if (!ds) { - kfree((void *)bts); - return -ENOMEM; - } - - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); - - *dsp = ds; - return 0; + *record = (const void *)idx; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_free(void **dsp) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (*dsp) { - kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - } - return 0; + return ds_access(task, index, record, ds_bts); } -int ds_get_bts_size(void *ds) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - int size_in_bytes; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (!ds) - return 0; - - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + return ds_access(task, index, record, ds_pebs); } -int ds_get_bts_end(void *ds) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int size_in_bytes = ds_get_bts_size(ds); - - if (size_in_bytes <= 0) - return size_in_bytes; + struct ds_context *context; + int error; - return size_in_bytes / ds_cfg.sizeof_bts; -} + if (!record) + return -EINVAL; -int ds_get_bts_index(void *ds) -{ - int index_offset_in_bytes; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + error = 0; + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + goto out; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + error += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(task, context, qual); + } - return index_offset_in_bytes / ds_cfg.sizeof_bts; + out: + ds_put_context(context); + return error; } -int ds_set_overflow(void *ds, int method) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + return ds_write(task, record, size, ds_bts, /* force = */ 0); } -int ds_get_overflow(void *ds) +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) { - return DS_O_WRAP; + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_clear(void *ds) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - int bts_size = ds_get_bts_size(ds); - unsigned long bts_base; - - if (bts_size <= 0) - return bts_size; - - bts_base = get_bts_buffer_base(ds); - memset((void *)bts_base, 0, bts_size); - - set_bts_index(ds, bts_base); - return 0; + return ds_write(task, record, size, ds_bts, /* force = */ 1); } -int ds_read_bts(void *ds, int index, struct bts_struct *out) +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) { - void *bts; + return ds_write(task, record, size, ds_pebs, /* force = */ 1); +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) +{ + struct ds_context *context; + unsigned long base, end; + int error; - if (index < 0) - return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (index >= ds_get_bts_size(ds)) - return -EINVAL; + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); - bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); + if (clear) + memset((void *)base, 0, end - base); - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(context->ds, qual, ds_index, base); - return sizeof(*out);; + error = 0; + out: + ds_put_context(context); + return error; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_bts(struct task_struct *task) { - unsigned long bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); +} - bts = get_bts_index(ds); +int ds_reset_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); +} - memset((void *)bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; +int ds_clear_bts(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); +} - case BTS_BRANCH: - set_from_ip((void *)bts, in->variant.lbr.from_ip); - set_to_ip((void *)bts, in->variant.lbr.to_ip); - break; +int ds_clear_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); +} - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); - set_info_type((void *)bts, in->qualifier); - set_info_data((void *)bts, in->variant.jiffies); - break; +int ds_get_pebs_reset(struct task_struct *task, u64 *value) +{ + struct ds_context *context; + int error; - default: + if (!value) return -EINVAL; - } - bts = bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return ds_cfg.sizeof_bts; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -unsigned long ds_debugctl_mask(void) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - return ds_cfg.debugctl_mask; -} + struct ds_context *context; + int error; -#ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) -}; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; -static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + + error = 0; + out: + ds_put_context(context); + return error; +} + +static const struct ds_configuration ds_cfg_var = { + .sizeof_ds = sizeof(long) * 12, + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, + .sizeof_rec[ds_pebs] = sizeof(long) * 10 }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_64 = { + .sizeof_ds = 8 * 12, + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 10 }; static inline void @@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0x1C: /* Atom */ + ds_configure(&ds_cfg_64); break; default: /* sorry, don't know about them */ @@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xF: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +void ds_free(struct ds_context *context) +{ + /* This is called when the task owning the parameter context + * is dying. There should not be any user of that context left + * to disturb us, anymore. */ + unsigned long leftovers = context->count; + while (leftovers--) + ds_put_context(context); +} +#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..5cec8a75a4e8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -316,6 +316,14 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(current->thread.ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(current->thread.ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread; next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..ad213494a22f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -267,6 +267,14 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(t->ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(t->ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + wrmsrl(MSR_IA32_DS_AREA, ds_next); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef380f0e..b7ff783dc5fe 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS +/* + * The configuration for a particular BTS hardware implementation. + */ +struct bts_configuration { + /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ + unsigned char sizeof_bts; + /* the size of a field in the BTS record in bytes */ + unsigned char sizeof_field; + /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ + unsigned long debugctl_mask; +}; +static struct bts_configuration bts_cfg; + +#define BTS_MAX_RECORD_SIZE (8 * 3) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + */ -static int ptrace_bts_get_size(struct task_struct *child) +enum bts_field { + bts_from = 0, + bts_to, + bts_flags, + + bts_escape = (unsigned long)-1, + bts_qual = bts_to, + bts_jiffies = bts_flags +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) { - if (!child->thread.ds_area_msr) - return -ENXIO; + base += (bts_cfg.sizeof_field * field); + return *(unsigned long *)base; +} - return ds_get_bts_index((void *)child->thread.ds_area_msr); +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (bts_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; } -static int ptrace_bts_read_record(struct task_struct *child, - long index, +/* + * Translate a BTS record from the raw format into the bts_struct format + * + * out (out): bts_struct interpretation + * raw: raw BTS record + */ +static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) +{ + memset(out, 0, sizeof(*out)); + if (bts_get(raw, bts_from) == bts_escape) { + out->qualifier = bts_get(raw, bts_qual); + out->variant.jiffies = bts_get(raw, bts_jiffies); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = bts_get(raw, bts_from); + out->variant.lbr.to_ip = bts_get(raw, bts_to); + } +} + +static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; + const void *bts_record; + size_t bts_index, bts_end; + int error; - if (index < 0) - return -EINVAL; + error = ds_get_bts_end(child, &bts_end); + if (error < 0) + return error; - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); if (bts_end <= index) return -EINVAL; + error = ds_get_bts_index(child, &bts_index); + if (error < 0) + return error; + /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + bts_index += bts_end - (index + 1); + if (bts_end <= bts_index) + bts_index -= bts_end; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; + error = ds_access_bts(child, bts_index, &bts_record); + if (error < 0) + return error; + + ptrace_bts_translate_record(&ret, bts_record); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; @@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child, return sizeof(ret); } -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; - - return ds_clear((void *)child->thread.ds_area_msr); -} - static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; + struct bts_struct ret; + const unsigned char *raw; + size_t end, i; + int error; - end = ds_get_bts_index(ds); - if (end <= 0) - return end; + error = ds_get_bts_index(child, &end); + if (error < 0) + return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + error = ds_access_bts(child, 0, (const void **)&raw); + if (error < 0) + return error; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; + for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { + ptrace_bts_translate_record(&ret, raw); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; } - ds_clear(ds); + error = ds_clear_bts(child); + if (error < 0) + return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; + int error = 0; + + error = -EOPNOTSUPP; + if (!bts_cfg.sizeof_bts) + goto errout; + error = -EIO; if (cfg_size < sizeof(cfg)) - return -EIO; + goto errout; + error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + goto errout; - if ((int)cfg.size < 0) - return -EINVAL; + error = -EINVAL; + if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && + !(cfg.flags & PTRACE_BTS_O_ALLOC)) + goto errout; - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; - } - cfg.size = PAGE_ALIGN(cfg.size); + if (cfg.flags & PTRACE_BTS_O_ALLOC) { + ds_ovfl_callback_t ovfl = 0; + unsigned int sig = 0; - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); + + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + goto errout; + + sig = cfg.signal; + ovfl = ptrace_bts_ovfl; + } + + error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + if (error < 0) goto errout; - ds = (void *)child->thread.ds_area_msr; + child->thread.bts_ovfl_signal = sig; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) + error = -EINVAL; + if (!child->thread.ds_ctx && cfg.flags) goto errout; if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); + child->thread.debugctlmsr |= bts_cfg.debugctl_mask; else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (cfg.flags & PTRACE_BTS_O_SCHED) set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); else clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - ret = sizeof(cfg); + error = sizeof(cfg); out: if (child->thread.debugctlmsr) @@ -702,10 +777,10 @@ out: else clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; + return error; errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); goto out; } @@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { - void *ds = (void *)child->thread.ds_area_msr; struct ptrace_bts_config cfg; + size_t end; + const void *base, *max; + int error; if (cfg_size < sizeof(cfg)) return -EIO; - memset(&cfg, 0, sizeof(cfg)); + error = ds_get_bts_end(child, &end); + if (error < 0) + return error; - if (ds) { - cfg.size = ds_get_bts_size(ds); + error = ds_access_bts(child, /* index = */ 0, &base); + if (error < 0) + return error; - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; + error = ds_access_bts(child, /* index = */ end, &max); + if (error < 0) + return error; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; + memset(&cfg, 0, sizeof(cfg)); + cfg.size = (max - base); + cfg.signal = child->thread.bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; - } + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; - cfg.bts_size = sizeof(struct bts_struct); + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & bts_cfg.debugctl_mask) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) return -EFAULT; @@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } - static int ptrace_bts_write_record(struct task_struct *child, const struct bts_struct *in) { - int retval; + unsigned char bts_record[BTS_MAX_RECORD_SIZE]; - if (!child->thread.ds_area_msr) - return -ENXIO; + BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + memset(bts_record, 0, bts_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; - return sizeof(*in); -} + case BTS_BRANCH: + bts_set(bts_record, bts_from, in->variant.lbr.from_ip); + bts_set(bts_record, bts_to, in->variant.lbr.to_ip); + break; -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) -{ - unsigned long rlim, vm; - int ret, old_size; + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + bts_set(bts_record, bts_from, bts_escape); + bts_set(bts_record, bts_qual, in->qualifier); + bts_set(bts_record, bts_jiffies, in->variant.jiffies); + break; - if (size < 0) + default: return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; } - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; - } - - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, ptrace_bts_write_record(tsk, &rec); } -#endif /* X86_BTS */ + +static const struct bts_configuration bts_cfg_netburst = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<2)|(1<<3)|(1<<5) +}; + +static const struct bts_configuration bts_cfg_pentium_m = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<6)|(1<<7) +}; + +static const struct bts_configuration bts_cfg_core2 = { + .sizeof_bts = 8 * 3, + .sizeof_field = 8, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void bts_configure(const struct bts_configuration *cfg) +{ + bts_cfg = *cfg; +} + +void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0xD: + case 0xE: /* Pentium M */ + bts_configure(&bts_cfg_pentium_m); + break; + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ + bts_configure(&bts_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + bts_configure(&bts_cfg_netburst); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } +#ifdef CONFIG_X86_PTRACE_BTS + (void)ds_release_bts(child); + + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); +#endif /* CONFIG_X86_PTRACE_BTS */ } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* * These bits need more cooking - not enabled yet: */ -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config (child, data, (struct ptrace_bts_config __user *)addr); @@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); + ret = ds_get_bts_index(child, /* pos = */ 0); break; case PTRACE_BTS_GET: @@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain (child, data, (struct bts_struct __user *) addr); break; -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f2fc8feb727d..f7aebe13c999 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); n = c->extended_cpuid_level; if (n >= 0x80000008) { -- cgit v1.2.3 From 970e725098a6da5a9c1f8128102c812e31a0444c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Apr 2008 16:40:17 -0700 Subject: x86, ptrace: PEBS support, warning fix arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_next' arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_prev' Cc: Markus Metzger Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: "Siddha, Suresh B" Cc: Roland McGrath Cc: Michael Kerrisk Cc: Cc: stephane eranian Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5cec8a75a4e8..496ea3110aa2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -484,20 +484,13 @@ int set_tsc_mode(unsigned int val) return 0; } -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_X86_DS +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) { - struct thread_struct *prev, *next; - unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; + unsigned long ds_prev = 0; + unsigned long ds_next = 0; - prev = &prev_p->thread; - next = &next_p->thread; - - debugctl = prev->debugctlmsr; - -#ifdef CONFIG_X86_DS if (prev->ds_ctx) ds_prev = (unsigned long)prev->ds_ctx->ds; if (next->ds_ctx) @@ -510,8 +503,28 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(0); wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } + return debugctl; +} +#else +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + return debugctl; +} #endif /* CONFIG_X86_DS */ +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long debugctl; + + prev = &prev_p->thread; + next = &next_p->thread; + + debugctl = update_debugctl(prev, next, prev->debugctlmsr); + if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); -- cgit v1.2.3 From 573da4224e8c3800e613d715e909c3179a7e3cb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 28 Apr 2008 23:15:04 +0400 Subject: x86: DS cleanup - dont treat 0 as NULL Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ds.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5b32b6d062b4..24a323c95997 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -238,12 +238,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) - return 0; + return NULL; context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - return 0; + return NULL; } *p_context = context; @@ -279,7 +279,7 @@ static inline void ds_put_context(struct ds_context *context) if (--context->count) goto out; - *(context->this) = 0; + *(context->this) = NULL; if (context->task) clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); @@ -341,16 +341,16 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; vm = current->mm->total_vm + pgsz; if (rlim < vm) - return 0; + return NULL; rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; vm = current->mm->locked_vm + pgsz; if (rlim < vm) - return 0; + return NULL; buffer = kzalloc(size, GFP_KERNEL); if (!buffer) - return 0; + return NULL; current->mm->total_vm += pgsz; current->mm->locked_vm += pgsz; @@ -395,7 +395,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, if (context->owner[qual] == current) goto out_unlock; error = -EPERM; - if (context->owner[qual] != 0) + if (context->owner[qual] != NULL) goto out_unlock; context->owner[qual] = current; @@ -445,7 +445,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return error; out_release: - context->owner[qual] = 0; + context->owner[qual] = NULL; ds_put_context(context); return error; @@ -825,7 +825,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_var); break; case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + case 0x1C: /* Atom */ ds_configure(&ds_cfg_64); break; default: -- cgit v1.2.3 From 34b2cd5b688b012975fcfc3b3970fc3508fa82c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:30:07 +0200 Subject: x86: PEBS cleanup Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad213494a22f..4a93c98a60a2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -500,7 +500,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; @@ -508,17 +507,23 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, debugctl = prev->debugctlmsr; #ifdef CONFIG_X86_DS - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, ds_next); + { + unsigned long ds_prev = 0, ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* + * We clear debugctl to make sure DS + * is not in use when we change it: + */ + debugctl = 0; + update_debugctlmsr(0); + wrmsrl(MSR_IA32_DS_AREA, ds_next); + } } #endif /* CONFIG_X86_DS */ -- cgit v1.2.3 From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a946247..b2f54fafb8bc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,7 +21,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..0f7c29a8c4ea 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,6 +189,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; -- cgit v1.2.3 From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: MM: virtual address debug Add some (configurable) expensive sanity checking to catch wrong address translations on x86. - create linux/mmdebug.h file to be able include this file in asm headers to not get unsolvable loops in header files - __phys_addr on x86_32 became a function in ioremap.c since PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined if declared in page_32.h - add __phys_addr_const for initializing doublefault_tss.__cr3 Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2. Contains Andi's enable numa virtual address debug patch. Signed-off-by: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar --- arch/x86/kernel/doublefault_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index a47798b59f07..395acb12b0d1 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { .ds = __USER_DS, .fs = __KERNEL_PERCPU, - .__cr3 = __pa(swapper_pg_dir) + .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) } }; -- cgit v1.2.3 From 323ec001c6bb98eeabb5abbdbb8c8055d9496554 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 29 Jun 2008 14:19:31 +0400 Subject: x86: use generic per-device dma coherent allocator Signed-off-by: Dmitry Baryshkov Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 122 +--------------------------------------------- 1 file changed, 2 insertions(+), 120 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dc00a1331ace..b75c81a8d3e6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -197,124 +197,6 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -#ifdef CONFIG_X86_32 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pos, err; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); - - pages >>= PAGE_SHIFT; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} -#else -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -#endif /* CONFIG_X86_32 */ - int dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI @@ -383,7 +265,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; if (!dev) { @@ -486,7 +368,7 @@ void dma_free_coherent(struct device *dev, size_t size, { int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) + if (dma_release_from_coherent(dev, order, vaddr)) return; if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); -- cgit v1.2.3 From d94d93ca5cc36cd78c532def62772c98fe8ba5d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:46 -0700 Subject: x64, x2apic/intr-remap: 8259 specific mask/unmask routines 8259 specific mask/unmask routines which be used later while enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49d9204..4b8a53d841f7 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + void init_8259A(int auto_eoi) { unsigned long flags; -- cgit v1.2.3 From 4dc2f96cacd1e74c688f94348a3bfd0a980817d5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:47 -0700 Subject: x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Generic ioapic specific routines which be used later during enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9e645cba11c4..84dd63c13d63 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -114,6 +114,9 @@ DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC RTE contents at the OS boot up */ +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -446,6 +449,69 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} + int skip_ioapic_setup; int ioapic_force; -- cgit v1.2.3 From 0c81c746f9bdbfaafe64322d540c8b7b59c27314 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:48 -0700 Subject: x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Move the read_apic_id() to genapic routines. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 5 +++++ arch/x86/kernel/genapic_64.c | 11 ----------- arch/x86/kernel/genapic_flat_64.c | 14 +++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++++++++- 4 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c14..9dd4ee4735f5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1096,6 +1096,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..7414871751a7 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -79,17 +79,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..400ed8df8b4e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static cpumask_t flat_target_cpus(void) { @@ -95,9 +97,17 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = GET_XAPIC_ID(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -123,6 +133,7 @@ struct genapic apic_flat = { .send_IPI_mask = flat_send_IPI_mask, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; /* @@ -187,4 +198,5 @@ struct genapic apic_physflat = { .send_IPI_mask = physflat_send_IPI_mask, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..1ef99be18488 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -134,9 +135,19 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int uv_read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + + return id; +} + static unsigned int phys_pkg_id(int index_msb) { - return GET_APIC_ID(read_apic_id()) >> index_msb; + return uv_read_apic_id() >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -159,6 +170,7 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .read_apic_id = uv_read_apic_id, }; static __cpuinit void set_x2apic_extra_bits(int pnode) -- cgit v1.2.3 From 2d7a66d02e11af9ab8e16c76d22767e622b4e3d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 14:24:19 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support, fix Yinghai Lu wrote: > Setting APIC routing to physical flat > Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (0 vs 4) > Pid: 1, comm: swapper Not tainted 2.6.26-rc9-tip-01763-g74f94b1-dirty #320 > > Call Trace: > [] ? set_cpu_sibling_map+0x38c/0x3bd > [] ? read_xapic_id+0x25/0x3e > [] ? verify_local_APIC+0x139/0x1b9 > [] ? read_xapic_id+0x25/0x3e > [] ? native_smp_prepare_cpus+0x224/0x2e9 > [] ? kernel_init+0x64/0x341 > [] ? child_rip+0xa/0x11 > [] ? kernel_init+0x0/0x341 > [] ? child_rip+0x0/0x11 > > > guess read_apic_id changing cuase some problem... genapic's read_apic_id() returns the actual apic id extracted from the APIC_ID register. And in some cases like UV, read_apic_id() returns completely different values from APIC ID register. Use the native apic register read, rather than genapic read_apic_id() in verify_local_APIC() And also, lapic_suspend() should also use native apic register read. Reported-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: "jeremy@goop.org" Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9dd4ee4735f5..3963f590c3d4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -626,10 +626,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -1137,7 +1137,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); -- cgit v1.2.3 From 1b374e4d6f8b3eb2fcd034fcc24ea8ba1dfde7aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:49 -0700 Subject: x64, x2apic/intr-remap: basic apic ops support Introduce basic apic operations which handle the apic programming. This will be used later to introduce another specific operations for x2apic. For the perfomance critial accesses like IPI's, EOI etc, we use the native operations as they are already referenced by different indirections like genapic, irq_chip etc. 64bit Paravirt ops can also define their apic operations accordingly. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ arch/x86/kernel/apic_64.c | 34 ++++++++++++++++++++++++++++++++-- arch/x86/kernel/io_apic_64.c | 8 ++++---- arch/x86/kernel/paravirt.c | 8 +++++--- arch/x86/kernel/smpboot.c | 28 +++++++++++----------------- 5 files changed, 58 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3e58b676d23b..2a83c07bd887 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,12 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +void apic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3963f590c3d4..9bb040689b31 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -119,13 +119,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +141,36 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 84dd63c13d63..b62d42ef9283 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1157,6 +1157,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) void __apicdebuginit print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; + unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1200,10 +1201,9 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..b80105a0f474 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,9 +360,11 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, +#ifnded CONFIG_X86_64 + .apic_write = native_apic_mem_write, + .apic_write_atomic = native_apic_mem_write_atomic, + .apic_read = native_apic_mem_read, +#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f35c2d8016ac..c55263b3df02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; -static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 +static int boot_cpu_logical_apicid; + u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_icr_write(APIC_DM_REMRD | regs[i], apicid); timeout = 0; do { @@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, + phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); + apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), + phys_apicid); /* * Give the other CPU some time to accept the IPI. @@ -1147,7 +1139,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ +#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); +#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); -- cgit v1.2.3 From 32e1d0a0651004f5fe47f85a2a5c725ad579a90c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:50 -0700 Subject: x64, x2apic/intr-remap: cpuid bits for x2apic feature cpuid feature for x2apic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..0bf4d37a0483 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ -- cgit v1.2.3 From 13c88fb58d0112d47f7839f24a755715c6218822 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:52 -0700 Subject: x64, x2apic/intr-remap: x2apic ops for x2apic mode support x2apic ops for x2apic mode support. This uses MSR interface and differs slightly from the xapic register layout. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9bb040689b31..a969ef78e12a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -171,6 +171,41 @@ struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .write_atomic = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ -- cgit v1.2.3 From cff73a6ffaed726780b001937d2a42efde553922 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:53 -0700 Subject: x64, x2apic/intr-remap: introcude self IPI to genapic routines Introduce self IPI op for genapic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 7414871751a7..6657de609dcb 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -61,7 +61,7 @@ void __init setup_apic_routing(void) /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 400ed8df8b4e..735586822135 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -131,6 +131,7 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, @@ -196,6 +197,7 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, -- cgit v1.2.3 From 12a67cf6851871ca8df42025c94f140c303d0f7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:54 -0700 Subject: x64, x2apic/intr-remap: x2apic cluster mode support x2apic cluster mode support. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 2 + arch/x86/kernel/genx2apic_cluster.c | 135 ++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 arch/x86/kernel/genx2apic_cluster.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 55ff016e9f69..bde3e9b6fec9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += genx2apic_cluster.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6657de609dcb..1029e178cdf7 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -38,6 +38,8 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; + else if (cpu_has_x2apic && intr_remapping_enabled) + genapic = &apic_x2apic_cluster; else #ifdef CONFIG_ACPI /* diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 000000000000..ed0fdede800a --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +/* + * for now each logical cpu is in its own vector allocation domain. + */ +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +/* + * for now, we send the IPI's one by one in the cpumask. + * TBD: Based on the cpu mask, we can send the IPI's to the cluster group + * at once. We have 16 cpu's in a cluster. This will minimize IPI register + * writes. + */ +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +static void init_x2apic_ldr(void) +{ + int cpu = smp_processor_id(); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); + return; +} + +struct genapic apic_x2apic_cluster = { + .name = "cluster x2apic", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; -- cgit v1.2.3 From 5c520a6724e912a7e6153b7597192edad6752750 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:55 -0700 Subject: x64, x2apic/intr-remap: setup init_apic_ldr for UV Signed-off-by: Suresh Siddha Signed-off-by: Jack Steiner Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 1ef99be18488..dcd4fb219daa 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,6 +120,10 @@ static int uv_apic_id_registered(void) return 1; } +static inline void uv_init_apic_ldr(void) +{ +} + static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -164,6 +168,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, + .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, -- cgit v1.2.3 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 279 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef78e12a..d5c06917b5b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42ef9283..9bd02ef049a0 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } -- cgit v1.2.3 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef049a0..877aa2e9d7e8 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) -- cgit v1.2.3 From 6e1cb38a2aef7680975e71f23de187859ee8b158 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:58 -0700 Subject: x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping x2apic support. Interrupt-remapping must be enabled before enabling x2apic, this is needed to ensure that IO interrupts continue to work properly after the cpu mode is changed to x2apic(which uses 32bit extended physical/cluster apic id). On systems where apicid's are > 255, BIOS can handover the control to OS in x2apic mode. Or if the OS handover was in legacy xapic mode, check if it is capable of x2apic mode. And if we succeed in enabling Interrupt-remapping, then we can enable x2apic mode in the CPU. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 + arch/x86/kernel/apic_64.c | 154 ++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common_64.c | 2 + arch/x86/kernel/genapic_64.c | 1 + arch/x86/kernel/mpparse.c | 2 + arch/x86/kernel/setup.c | 2 + arch/x86/kernel/smpboot.c | 5 ++ 7 files changed, 164 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 785700a08e9d..8705262ddcda 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d5c06917b5b1..dd0501039f09 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -46,8 +48,12 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; + /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -896,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -943,6 +1068,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -981,6 +1111,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1238,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1381,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 36537ab9e56a..e7bf3c2dc5fe 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,6 +606,8 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1029e178cdf7..792e21ba1a81 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..70e1f3e287fb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 987b6fde3a99..2e78a143dec3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -730,6 +730,8 @@ void __init setup_arch(char **cmdline_p) num_physpages = max_pfn; check_efer(); + if (cpu_has_x2apic) + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c55263b3df02..0c43e1f2e7d3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1145,6 +1145,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); +#ifdef CONFIG_X86_64 + enable_IR_x2apic(); + setup_apic_routing(); +#endif + if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); -- cgit v1.2.3 From 2d9579a124d746a3e0e0ba45e57d80800ee80807 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:59 -0700 Subject: x64, x2apic/intr-remap: support for x2apic physical mode support x2apic Physical mode support. By default we will use x2apic cluster mode. x2apic physical mode can be selected using "x2apic_phys" boot parameter. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 18 +++++- arch/x86/kernel/genx2apic_phys.c | 122 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/genx2apic_phys.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3e9b6fec9..81280e93e792 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 792e21ba1a81..3940d8161f8b 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -30,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; +static int x2apic_phys = 0; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + static enum uv_system_type uv_system_type; /* @@ -39,9 +48,12 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) - genapic = &apic_x2apic_cluster; - else + else if (cpu_has_x2apic && intr_remapping_enabled) { + if (x2apic_phys) + genapic = &apic_x2apic_phys; + else + genapic = &apic_x2apic_cluster; + } else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 000000000000..3c70b9d692b2 --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +void init_x2apic_ldr(void) +{ + return; +} + +struct genapic apic_x2apic_phys = { + .name = "physical x2apic", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; -- cgit v1.2.3 From 372e92d8b3e433888bf76c36f1c7e1405eae1584 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 14:56:18 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support On Thu, Jul 10, 2008 at 12:53:20PM -0700, Ingo Molnar wrote: > > Btw., i threw it at the -tip test-cluster and got back a quick build > bugreport: > > arch/x86/xen/enlighten.c: In function 'xen_patch': > arch/x86/xen/enlighten.c:1084: warning: label 'patch_site' defined but not used > arch/x86/xen/enlighten.c: At top level: > arch/x86/xen/enlighten.c:1272: error: expected identifier before '(' token > arch/x86/xen/enlighten.c:1273: error: expected '}' before '.' token > arch/x86/kernel/paravirt.c:376:2: error: invalid preprocessing directive > #ifndedarch/x86/kernel/paravirt.c:384:2: error: #endif without #if > > with this config: > > http://redhat.com/~mingo/misc/config-Thu_Jul_10_21_43_28_CEST_2008.bad fix the typo. Signed-off-by: Suresh Siddha Cc: "Siddha Cc: Suresh B" Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b80105a0f474..4f29ff847ebe 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,7 +360,7 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifnded CONFIG_X86_64 +#ifndef CONFIG_X86_64 .apic_write = native_apic_mem_write, .apic_write_atomic = native_apic_mem_write_atomic, .apic_read = native_apic_mem_read, -- cgit v1.2.3 From 277d1f5846d84e16760131a93b7a67ebfa8eded4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:55 -0700 Subject: x2apic: uninline uv_init_apic_ldr() Andrew says: > There's no point in declaring it inline if it's always called indirectly. And point taken! Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dcd4fb219daa..c915f750241e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,7 +120,7 @@ static int uv_apic_id_registered(void) return 1; } -static inline void uv_init_apic_ldr(void) +static void uv_init_apic_ldr(void) { } -- cgit v1.2.3 From c535b6a1a685eb23f96e2c221777d6c1e05080d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:41:54 -0700 Subject: x86: let 32bit use apic_ops too Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 2a83c07bd887..7413354c9ffd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,19 +145,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_icr_write(u32 low, u32 id) -{ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); -} - -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ -- cgit v1.2.3 From 4c9961d56ec20c27ec5d02e49fd7427748312741 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:44:16 -0700 Subject: x86: make read_apic_id return final apicid also remove GET_APIC_ID when read_apic_id is used. need to apply after [PATCH] x86: mach_apicdef.h need to include before smp.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 6 +++--- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/io_apic_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 6 +++--- 7 files changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8705262ddcda..b314bcd08406 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7413354c9ffd..47ff978aecfd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1230,7 +1230,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1270,7 +1270,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dd0501039f09..c75f58a66d8e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1060,7 +1060,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -1069,7 +1069,7 @@ void __init early_init_lapic_mapping(void) void __init init_apic_mappings(void) { if (x2apic) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return; } @@ -1092,7 +1092,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 735586822135..7dac2f275fad 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -101,7 +101,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_XAPIC_ID(apic_read(APIC_ID)); + id = GET_APIC_ID(apic_read(APIC_ID)); return id; } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c50adb84ea6f..382208d11f8d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1501,7 +1501,7 @@ void /*__init*/ print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(read_apic_id())); + GET_APIC_ID(v)); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1709,8 +1709,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(read_apic_id()); + entry.dest.physical.physical_dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 877aa2e9d7e8..2db0f98e2af5 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1246,7 +1246,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1439,7 +1439,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(read_apic_id()); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0c43e1f2e7d3..6cd002f3e20e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -211,7 +211,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(read_apic_id()); + phys_id = read_apic_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -1157,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { + if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); + read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); -- cgit v1.2.3 From f910a9dc7c865896815e2a95fe33363e9522f277 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 12 Jul 2008 01:01:20 -0700 Subject: x86: make 64bit have get_apic_id generalize the x2apic code some more. let read_apic_id become a macro (later on a function/inline) GET_APIC_ID(apic_read(APIC_ID)) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) instead of this weird construct: -#define read_apic_id (genapic->read_apic_id) Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 26 +++++++++++++++++++++++--- arch/x86/kernel/genx2apic_cluster.c | 20 +++++++++++++++++++- arch/x86/kernel/genx2apic_phys.c | 20 +++++++++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 23 ++++++++++++++++++++--- 4 files changed, 81 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7dac2f275fad..2c973cbf054f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -97,11 +97,27 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_APIC_ID(apic_read(APIC_ID)); + id = get_apic_id(apic_read(APIC_ID)); return id; } @@ -134,7 +150,9 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -200,5 +218,7 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ed0fdede800a..40bc0140d89f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -94,6 +94,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -131,5 +147,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3c70b9d692b2..2f3c6ca19de9 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -84,6 +84,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -118,5 +134,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c915f750241e..3ca29cd8c23c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -139,16 +139,31 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } -static unsigned int uv_read_apic_id(void) +static unsigned int get_apic_id(unsigned long x) { unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + id = x | __get_cpu_var(x2apic_extra_bits); return id; } +static long set_apic_id(unsigned int id) +{ + unsigned long x; + + /* maskout x2apic_extra_bits ? */ + x = id; + return x; +} + +static unsigned int uv_read_apic_id(void) +{ + + return get_apic_id(apic_read(APIC_ID)); +} + static unsigned int phys_pkg_id(int index_msb) { return uv_read_apic_id() >> index_msb; @@ -175,7 +190,9 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ - .read_apic_id = uv_read_apic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) -- cgit v1.2.3 From 94a8c3c2437c8946f1b6c8e0b2c560a7db8ed3c6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Jul 2008 22:19:35 -0700 Subject: x86: let 32bit use apic_ops too - fix fix for pv - clean up the namespace there too. Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 5 ----- arch/x86/kernel/vmi_32.c | 51 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4f29ff847ebe..e0f139106c7e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,11 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = native_apic_mem_write, - .apic_write_atomic = native_apic_mem_write_atomic, - .apic_read = native_apic_mem_read, -#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..cf3074354553 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +static u32 vmi_apic_read(u32 reg) +{ + return 0; +} + +static void vmi_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u64 vmi_apic_icr_read(void) +{ + return 0; +} + +static void vmi_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void vmi_apic_wait_icr_idle(void) +{ + return; +} + +static u32 vmi_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops vmi_basic_apic_ops = { + .read = vmi_apic_read, + .write = vmi_apic_write, + .write_atomic = vmi_apic_write, + .icr_read = vmi_apic_icr_read, + .icr_write = vmi_apic_icr_write, + .wait_icr_idle = vmi_apic_wait_icr_idle, + .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, +}; +#endif + /* * VMI setup common to all processors */ @@ -904,9 +948,10 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(pv_apic_ops.apic_read, APICRead); - para_fill(pv_apic_ops.apic_write, APICWrite); - para_fill(pv_apic_ops.apic_write_atomic, APICWrite); + para_fill(vmi_basic_apic_ops.read, APICRead); + para_fill(vmi_basic_apic_ops.write, APICWrite); + para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); + apic_ops = &vmi_basic_apic_ops; #endif /* -- cgit v1.2.3 From 9a8f0e6b5dfe3b4f330fc82b16a4000f5688fce8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 09:59:40 -0700 Subject: x86: let 32bit use apic_ops too - fix Fix VMI apic_ops. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 +++++ arch/x86/kernel/vmi_32.c | 51 +++-------------------------------------------- 2 files changed, 8 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 47ff978aecfd..cb54d9e20f94 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index cf3074354553..237082833c14 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,50 +676,6 @@ static inline int __init probe_vmi_rom(void) return 0; } -#ifdef CONFIG_X86_LOCAL_APIC -static u32 vmi_apic_read(u32 reg) -{ - return 0; -} - -static void vmi_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 vmi_apic_icr_read(void) -{ - return 0; -} - -static void vmi_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void vmi_apic_wait_icr_idle(void) -{ - return; -} - -static u32 vmi_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops vmi_basic_apic_ops = { - .read = vmi_apic_read, - .write = vmi_apic_write, - .write_atomic = vmi_apic_write, - .icr_read = vmi_apic_icr_read, - .icr_write = vmi_apic_icr_write, - .wait_icr_idle = vmi_apic_wait_icr_idle, - .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, -}; -#endif - /* * VMI setup common to all processors */ @@ -948,10 +904,9 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(vmi_basic_apic_ops.read, APICRead); - para_fill(vmi_basic_apic_ops.write, APICWrite); - para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); - apic_ops = &vmi_basic_apic_ops; + para_fill(apic_ops->read, APICRead); + para_fill(apic_ops->write, APICWrite); + para_fill(apic_ops->write_atomic, APICWrite); #endif /* -- cgit v1.2.3 From 91467bdf6e53058af13fd255375d6634ba0c70e0 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Fri, 18 Jul 2008 19:07:53 +0200 Subject: x86: move dma32_reserve_bootmem() after reserve_crashkernel() On a x86-64 machine (nothing special I could encounter) I had the problem that crashkernel reservation with the usual "64M@16M" failed. While debugging that, I encountered that dma32_reserve_bootmem() reserves a memory region which is in that area. Because dma32_reserve_bootmem() does not rely on a specific offset but crashkernel does, it makes sense to move the dma32_reserve_bootmem() reservation down a bit. I tested that patch and it works without problems. I don't see any negative effects of that move, but maybe I oversaw something ... While we strictly don't need that patch in 2.6.27 because we have the automatic, dynamic offset detection, it makes sense to also include it here because: - it's easier to get it in -stable then, - many people are still used to the 'crashkernel=...@16M' syntax, - not everybody may be using a reloatable kernel. Signed-off-by: Bernhard Walle Cc: kexec@lists.infradead.org Cc: vgoyal@redhat.com Cc: akpm@linux-foundation.org Cc: Bernhard Walle Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81a..74d110ef2690 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -792,10 +792,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn); -#ifdef CONFIG_X86_64 - dma32_reserve_bootmem(); -#endif - #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -810,6 +806,15 @@ void __init setup_arch(char **cmdline_p) #endif reserve_crashkernel(); +#ifdef CONFIG_X86_64 + /* + * dma32_reserve_bootmem() allocates bootmem which may conflict + * with the crashkernel command line, so do that after + * reserve_crashkernel() + */ + dma32_reserve_bootmem(); +#endif + reserve_ibft_region(); #ifdef CONFIG_KVM_CLOCK -- cgit v1.2.3 From f586bf7df9acc26b68b0feefc0dd32fa63516d3a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 15:58:35 -0700 Subject: x86: APIC: Remove apic_write_around(); use alternatives, merge fix On Fri, Jul 18, 2008 at 02:03:48PM -0700, Ingo Molnar wrote: > > * Yinghai Lu wrote: > > > > git merge tip/x86/x2apic > > CONFLICT (content): Merge conflict in arch/x86/kernel/Makefile > > CONFLICT (content): Merge conflict in arch/x86/kernel/paravirt.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/smpboot.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/vmi_32.c > > CONFLICT (content): Merge conflict in arch/x86/xen/enlighten.c > > CONFLICT (content): Merge conflict in include/asm-x86/apic.h > > CONFLICT (content): Merge conflict in include/asm-x86/paravirt.h > > that's due to the changes in tip/x86/apic and in tip/x86/uv. > > ok, i've just merged x86/apic into x86/x2apic and x86/uv as well, and > pushed out the result. > > Note: it's a first raw merge and completely untested. It will now merge > cleanly into tip/master. There are probably a few details missing. Ingo, thanks for doing this. While I was testing my merge changes, you posted yours... anyhow we need this piece, which is missing from your merge. Signed-off-by: Suresh Siddha Cc: Yinghai Lu Cc: "Maciej W. Rozycki" Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 ++--- arch/x86/kernel/apic_64.c | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index dcb897f22aa2..8728f54a93d8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -174,8 +174,8 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); } u64 xapic_icr_read(void) @@ -191,7 +191,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46e612408aca..a850bc63fb1c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -167,7 +167,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, @@ -206,7 +205,6 @@ u64 x2apic_icr_read(void) static struct apic_ops x2apic_ops = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .write_atomic = native_apic_msr_write, .icr_read = x2apic_icr_read, .icr_write = x2apic_icr_write, .wait_icr_idle = x2apic_wait_icr_idle, -- cgit v1.2.3 From 1b9b89e7f163336ad84200b66a17284dbf26aced Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 22:08:21 -0700 Subject: x86: add apic probe for genapic 64bit, v2 introducing an APIC handling probing abstraction: static struct genapic *apic_probe[] __initdata = { &apic_x2apic_uv_x, &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, NULL, }; This way we can remove UV, x2apic specific code from genapic_64.c and move them to their specific genapic files. [ v2: fix compiling when CONFIG_ACPI is not set ] Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 85 ++++++++++++------------------------- arch/x86/kernel/genapic_flat_64.c | 26 ++++++++++++ arch/x86/kernel/genx2apic_cluster.c | 11 +++++ arch/x86/kernel/genx2apic_phys.c | 21 +++++++++ arch/x86/kernel/genx2apic_uv_x.c | 32 +++++++++++++- 5 files changed, 115 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 3940d8161f8b..b3ba969c50d2 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,62 +16,37 @@ #include #include #include -#include #include #include #include -#ifdef CONFIG_ACPI -#include -#endif - -DEFINE_PER_CPU(int, x2apic_extra_bits); +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2xpic_uv_x; +extern struct genapic apic_x2apic_phys; +extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; -static int x2apic_phys = 0; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static enum uv_system_type uv_system_type; +static struct genapic *apic_probe[] __initdata = { + &apic_x2apic_uv_x, + &apic_x2apic_phys, + &apic_x2apic_cluster, + &apic_physflat, + NULL, +}; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (uv_system_type == UV_NON_UNIQUE_APIC) - genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) { - if (x2apic_phys) - genapic = &apic_x2apic_phys; - else - genapic = &apic_x2apic_cluster; - } else -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; - else -#endif - - if (max_physical_apicid < 8) - genapic = &apic_flat; - else - genapic = &apic_physflat; - - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + if (genapic == &apic_flat) { + if (max_physical_apicid >= 8) + genapic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } } /* Same for both flat and physical. */ @@ -83,23 +58,15 @@ void apic_send_IPI_self(int vector) int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) - uv_system_type = UV_NON_UNIQUE_APIC; + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + genapic = apic_probe[i]; + printk(KERN_INFO "Setting APIC routing to %s.\n", + genapic->name); + return 1; + } } return 0; } - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 2c973cbf054f..1740b83329f6 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -21,6 +21,15 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif + +static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 1; +} + static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -138,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", + .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -160,6 +170,21 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ +static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + return 1; +#endif + + return 0; +} static cpumask_t physflat_target_cpus(void) { @@ -206,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 40bc0140d89f..ef3f3182d50a 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -4,12 +4,22 @@ #include #include #include +#include + #include #include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled) + return 1; + + return 0; +} + /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ static cpumask_t x2apic_target_cpus(void) @@ -135,6 +145,7 @@ static void init_x2apic_ldr(void) struct genapic apic_x2apic_cluster = { .name = "cluster x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2f3c6ca19de9..f35a3bf3a9e5 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -4,10 +4,30 @@ #include #include #include +#include + #include #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + +static int x2apic_phys; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + return 1; + + return 0; +} /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ @@ -122,6 +142,7 @@ void init_x2apic_ldr(void) struct genapic apic_x2apic_phys = { .name = "physical x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a8e5cb4c4d43..a882bc3a2ce9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -27,6 +27,33 @@ #include #include +static enum uv_system_type uv_system_type; + +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) { + uv_system_type = UV_NON_UNIQUE_APIC; + return 1; + } + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} + DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -153,7 +180,7 @@ static unsigned int get_apic_id(unsigned long x) return id; } -static long set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { unsigned long x; @@ -182,6 +209,7 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", + .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, @@ -433,3 +461,5 @@ void __cpuinit uv_cpu_init(void) if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } + + -- cgit v1.2.3 From 026e2c05ef58ef413e2d52696f125d5ea1aa8bce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Jul 2008 11:58:14 +0200 Subject: x86, cyrix: debug Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..db5868cd2443 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -116,7 +116,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -127,11 +127,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); } static void __cpuinit set_cx86_inc(void) @@ -144,10 +144,10 @@ static void __cpuinit set_cx86_inc(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + setCx86_old(CX86_PCR1, getCx86_old(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } @@ -162,14 +162,14 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -286,7 +286,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -309,7 +309,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -424,7 +424,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } -- cgit v1.2.3 From bbc1f698a508927d21324b57500e863f9bd562b9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:34:13 +0530 Subject: x86: Introducing asm/syscalls.h Declaring arch-dependent syscalls for x86 architecture Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ioport.c | 1 + arch/x86/kernel/ldt.c | 1 + arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 1 + arch/x86/kernel/signal_32.c | 1 + arch/x86/kernel/signal_64.c | 1 + arch/x86/kernel/sys_i386_32.c | 2 ++ arch/x86/kernel/sys_x86_64.c | 1 + arch/x86/kernel/tls.c | 1 + arch/x86/kernel/vm86_32.c | 1 + 10 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a31c85..191914302744 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858a..c49ff2dc8f83 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..6490e4981261 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e8a8e1b99817..c78090dd0c52 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 07faaa5109cb..5cede1045ce7 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf87684474f1..b95a0a609053 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb855a60..1884a8d12bfa 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,6 +22,8 @@ #include #include +#include + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef33817..c9288c883e20 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -16,6 +16,7 @@ #include #include +#include asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf375a307..6bb7b8579e70 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "tls.h" diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566fa27d2..4eeb5cf9720d 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -46,6 +46,7 @@ #include #include #include +#include /* * Known problems: -- cgit v1.2.3 From fb26132b441e75d6ba9996efc29b42081aee0abd Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:36:40 +0530 Subject: x86: process_32.c declare cpu_number before they get used Moved DECLARE_PER_CPU(int, cpu_number) from CONFIG_X86_32_SMP to CONFIG_X86_32 because cpu_number is required for both. And include asm/smp.h in process_32.c Signed-off-by: Jaswinder Singh --- arch/x86/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6490e4981261..7218bccd1766 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -56,6 +56,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -- cgit v1.2.3 From cc0384917bf69079088701a0725c5fc6b554bf35 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:52:51 +0530 Subject: x86: time_XX.c declare functions before they get used Declare time_init() in asm-x86/time.h Also did cleanup in asm-x86/timer.h : timer_ack is only required for X86_32 int recalibrate_cpu_khz(void) is for X86_32 Signed-off-by: Jaswinder Singh --- arch/x86/kernel/time_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ffe3c664afc0..bbecf8b6bf96 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "do_timer.h" -- cgit v1.2.3 From 1e84911c6c37fd1080ef07039e19c346628b31db Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:58:29 +0530 Subject: x86: mtrr/main.c declare range_state as static Signed-off-by: Jaswinder Singh --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8faf..b5ade28ca8f8 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -729,7 +729,7 @@ struct var_mtrr_range_state { mtrr_type type; }; -struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; static int __init -- cgit v1.2.3 From a31863168660c6b6f6c7ffe05bb6a38e97803326 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 22 Jul 2008 21:53:53 +0200 Subject: x86: consolidate header guards This patch consolidates the header guard names which are also used externally, i.e. in .c files. Signed-off-by: Vegard Nossum --- arch/x86/kernel/asm-offsets_64.c | 2 +- arch/x86/kernel/syscall_64.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index aa89387006fe..505543a75a56 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c17487..3d1be4f0fac5 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); -- cgit v1.2.3 From 05d3ed0a1fe3ea05ab9f3b8d32576a0bc2e19660 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 21 Jul 2008 10:15:22 -0400 Subject: x86, pci: iommu fix potential overflow in alloc_iommu() It is possible that alloc_iommu()'s boundary_size overflows as dma_get_seg_boundary can return 0xffffffff. In that case, further usage of boundary_size triggers a BUG_ON() in the iommu code. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d2..1062dc1e6396 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -93,7 +93,7 @@ static unsigned long alloc_iommu(struct device *dev, int size) base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); -- cgit v1.2.3 From fd60b39f845aa7462453af8aed4f059b162f181f Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 23 Jul 2008 22:45:01 +0800 Subject: arch/x86/kernel/genx2apic_uv_x.c: Removed duplicated include Removed duplicated include file in arch/x86/kernel/genx2apic_uv_x.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a882bc3a2ce9..14a9772778e5 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 14:21:18 -0700 Subject: x86/paravirt/xen: properly fill out the ldt ops LTP testing showed that Xen does not properly implement sys_modify_ldt(). This patch does the final little bits needed to make the ldt work properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 9 ++++++++- arch/x86/kernel/paravirt.c | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3f..4895e0634d22 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, (mincount - oldsize) * LDT_ENTRY_SIZE); + paravirt_alloc_ldt(newldt, mincount); + #ifdef CONFIG_X86_64 /* CHECKME: Do we really need this ? */ wmb(); @@ -75,6 +77,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { + paravirt_free_ldt(oldldt, oldsize); if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else @@ -86,10 +89,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + int i; if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + + for(i = 0; i < old->size; i++) + write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } @@ -126,6 +132,7 @@ void destroy_context(struct mm_struct *mm) if (mm == current->active_mm) clear_LDT(); #endif + paravirt_free_ldt(mm->context.ldt, mm->context.size); if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d798..d8f2277be5a0 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -348,6 +348,10 @@ struct pv_cpu_ops pv_cpu_ops = { .write_ldt_entry = native_write_ldt_entry, .write_gdt_entry = native_write_gdt_entry, .write_idt_entry = native_write_idt_entry, + + .alloc_ldt = paravirt_nop, + .free_ldt = paravirt_nop, + .load_sp0 = native_load_sp0, #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) -- cgit v1.2.3 From d5de8841355a48f7f634a04507185eaf1f9755e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 13:28:58 -0700 Subject: x86: split spinlock implementations out into their own files ftrace requires certain low-level code, like spinlocks and timestamps, to be compiled without -pg in order to avoid infinite recursion. This patch splits out the core paravirt spinlocks and the Xen spinlocks into separate files which can be compiled without -pg. Also do xen/time.c while we're about it. As a result, we can now use ftrace within a Xen domain. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/paravirt-spinlocks.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 23 ----------------------- 3 files changed, 33 insertions(+), 25 deletions(-) create mode 100644 arch/x86/kernel/paravirt-spinlocks.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec5..d679cb2c79b4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg -CFLAGS_REMOVE_paravirt.o = -pg +CFLAGS_REMOVE_paravirt-spinlocks.o = -pg endif # @@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..38d7f7f1dbc9 --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -0,0 +1,31 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include +#include + +#include + +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +#endif +}; +EXPORT_SYMBOL_GPL(pv_lock_ops); + +void __init paravirt_use_bytelocks(void) +{ +#ifdef CONFIG_SMP + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d798..0d71de9ff56d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -461,18 +450,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .spin_is_locked = __ticket_spin_is_locked, - .spin_is_contended = __ticket_spin_is_contended, - - .spin_lock = __ticket_spin_lock, - .spin_trylock = __ticket_spin_trylock, - .spin_unlock = __ticket_spin_unlock, -#endif -}; -EXPORT_SYMBOL_GPL(pv_lock_ops); - EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); -- cgit v1.2.3 From e0a5a5d9b006fd441e61685a051fa85d52fb172c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 22 Jul 2008 18:14:16 +0200 Subject: x86, 64-bit, dwarf2: push pushes 8 bytes and popf pops 8 The CFI_ADJUST_CFA_OFFSET dwarf2 annotation of a push/popf pair in ret_from_fork wrongly used a value of 4. It should have been 8. Fix that. Signed-off-by: Alexander van Heukelum Cc: Andi Kleen Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d439605..cf3a0b2d0059 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) -- cgit v1.2.3 From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 29 +++++++++++++++++++---------- arch/x86/kernel/io_apic_64.c | 31 +++++++++++++++++++------------ 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c5..d54455ec9850 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -50,6 +50,8 @@ #include #include +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1345,7 +1347,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1463,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,7 +1484,7 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1567,12 +1568,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1605,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -2333,8 +2344,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d1707..8cdcc4f287cc 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -53,6 +53,8 @@ #include #include +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +89,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -965,7 +965,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1060,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,7 +1081,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1159,12 +1158,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1195,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1849,8 +1858,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { -- cgit v1.2.3 From 510b37258dfd61693ca6c039865c78bd996e3718 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Jul 2008 13:20:22 -0700 Subject: x86: move declaring x2apic_extra_bits it is for uv only Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 2 -- arch/x86/kernel/genx2apic_uv_x.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f35a3bf3a9e5..3229c68aedd4 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -10,8 +10,6 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); - static int x2apic_phys; static int set_x2apic_phys_mode(char *arg) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 14a9772778e5..169388c4cce9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -26,6 +26,8 @@ #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + static enum uv_system_type uv_system_type; static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -- cgit v1.2.3 From 36a028de785c6e6f15ac84f8b3b087b89137ea26 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:28 +0200 Subject: x86: apic unification - merge down lapic_get_maxlvt No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c898358371..447dd8c5c0e9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -193,9 +193,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7ee..4fa2a8620c26 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -158,11 +158,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* -- cgit v1.2.3 From d4c63ec060f3315653c0ae5bc3a7fe2419a2282f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:29 +0200 Subject: x86: apic unification - merge down enable_NMI_through_LVT0 No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 447dd8c5c0e9..01708f128eee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -172,11 +172,15 @@ u32 safe_apic_wait_icr_idle(void) */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; - /* Level triggered for 82489DX */ + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fa2a8620c26..7615b4b9c3f3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,6 +150,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } -- cgit v1.2.3 From 4fe702c7e401f912c0edd294af6e37c02f451bbb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:19:27 +0530 Subject: X86_32: declare pt_regs_access as unsigned long Fixed pt_regs_access to unsigned long as per X86_64 Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccce85db..fc3e8dcd9da6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -69,7 +69,7 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); regno >>= 2; -- cgit v1.2.3 From f86c99853b22576ee8dc4fa27ff6f3c0c7ce0ef8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:52:53 +0530 Subject: X86_SMP: smpboot.c declare idle_thread_array and smp_b_stepping as static Signed-off-by: Jaswinder Singh --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4b53a647bc0a..a9331a42f76f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) #else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif @@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid; static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ -int __cpuinitdata smp_b_stepping; +static int __cpuinitdata smp_b_stepping; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) -- cgit v1.2.3 From 2907829cd0ffdef69083985ba28cf1cf3857c681 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 11:05:56 +0530 Subject: X86_SMP: ipi.c declare functions before they get used move upwards for __send_IPI_shortcut and send_IPI_mask_bitmask Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ipi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 3f7537b669d3..f1c688e46f35 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,6 +20,8 @@ #ifdef CONFIG_X86_32 #include +#include + /* * the following functions deal with sending IPIs between CPUs. * @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) } /* must come after the send_IPI functions above for inlining */ -#include static int convert_apicid_to_cpu(int apic_id) { int i; -- cgit v1.2.3 From 3c9339049df5cc3a468c11de6c4101a1ea8c3d83 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Jul 2008 11:32:36 +0200 Subject: x86: fix ds.c build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/ds.c: In function ‘ds_allocate_buffer': arch/x86/kernel/ds.c:339: error: implicit declaration of function ‘PAGE_ALIGN' Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 24a323c95997..ab21c270bfa4 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -29,6 +29,7 @@ #include #include #include +#include /* -- cgit v1.2.3 From 1ddb5518052e4e28ab489237443f7443b3fd69ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:55 +0200 Subject: x86: convert pci-dma.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551bb..88ddd04cfa98 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void) * using 512M as goal */ align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); + size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); if (dma32_bootmem_ptr) -- cgit v1.2.3 From a8132e5fe2c4f3f780b8bd3cce7851640f79f1c7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 14:57:59 +0200 Subject: x86, AMD IOMMU: replace to_pages macro with iommu_num_pages This patch removes the to_pages macro from AMD IOMMU code and calls the generic iommu_num_pages function instead. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c25210e6ac88..79eff735fde6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -29,9 +29,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EXIT_LOOP_COUNT 10000000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = to_pages(address, size); + unsigned pages = iommu_num_pages(address, size); address &= PAGE_MASK; @@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = to_pages(iommu->exclusion_start, - iommu->exclusion_length); + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned int pages; int i; - pages = to_pages(paddr, size); + pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; address = dma_ops_alloc_addresses(dev, dma_dom, pages); @@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = to_pages(dma_addr, size); + pages = iommu_num_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; -- cgit v1.2.3 From 87e39ea5714dd59ba31e36c25833d2b20255a29d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 14:58:00 +0200 Subject: x86 gart: replace to_pages macro with iommu_num_pages This patch removes the to_pages macro from x86 GART code and calls the generic iommu_num_pages function instead. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d2..c98240431561 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP @@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) { - unsigned long npages = to_pages(phys_mem, size); + unsigned long npages = iommu_num_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages); int i; @@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = to_pages(dma_addr, size); + npages = iommu_num_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = to_pages(s->offset, s->length); + pages = iommu_num_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += to_pages(s->offset, s->length); + pages += iommu_num_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) -- cgit v1.2.3 From 3a61ec387c9092dfc91a5959145d36835a72fc4c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 13:07:50 +0200 Subject: x86, AMD IOMMU: include amd_iommu_last_bdf in device initialization All the values read while searching for amd_iommu_last_bdf are defined as inclusive. Let the code handle this value as such. Found by Wei Wang. Thanks Wei. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Cc: Wei Wang Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/amd_iommu_init.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c25210e6ac88..74697408576f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -667,7 +667,7 @@ static int get_device_resources(struct device *dev, _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ - if (_bdf >= amd_iommu_last_bdf) { + if (_bdf > amd_iommu_last_bdf) { *iommu = NULL; *domain = NULL; *bdf = 0xffff; @@ -1085,7 +1085,7 @@ void prealloc_protection_domains(void) while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { devid = (dev->bus->number << 8) | dev->devfn; - if (devid >= amd_iommu_last_bdf) + if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; if (domain_for_device(devid)) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c9d8ff2eb130..d9a9da597e79 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) set_device_exclusion_range(m->devid, m); break; case ACPI_IVMD_TYPE_ALL: - for (i = 0; i < amd_iommu_last_bdf; ++i) + for (i = 0; i <= amd_iommu_last_bdf; ++i) set_device_exclusion_range(i, m); break; case ACPI_IVMD_TYPE_RANGE: @@ -934,7 +934,7 @@ int __init amd_iommu_init(void) /* * let all alias entries point to itself */ - for (i = 0; i < amd_iommu_last_bdf; ++i) + for (i = 0; i <= amd_iommu_last_bdf; ++i) amd_iommu_alias_table[i] = i; /* -- cgit v1.2.3 From 5c05917e7fe313a187ad6ebb94c1c6cf42862a0b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jul 2008 17:29:40 -0700 Subject: x86: usb debug port early console, v4 based on work from Eric, and add some timeout so don't dead loop when debug device is not installed v2: fix checkpatch warning v3: move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h also add CONFIG_EARLY_PRINTK_DBGP to disable it by default v4: address comments from Ingo, seperate ehci reg def moving to another patch also add auto detect port that connect to debug device for Nvidia southbridge Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Andi Kleen Cc: "Arjan van de Ven" Cc: "Eric W. Biederman" Cc: "Greg KH" Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 762 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 759 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index ff9e7350da54..28155acf706e 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -3,11 +3,19 @@ #include #include #include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include /* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) @@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ static int early_serial_putc(unsigned char ch) { unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); outb(ch, early_serial_base + TXR); @@ -151,6 +160,721 @@ static struct console early_serial_console = { .index = -1, }; +#ifdef CONFIG_EARLY_PRINTK_DBGP + +static struct ehci_caps __iomem *ehci_caps; +static struct ehci_regs __iomem *ehci_regs; +static struct ehci_dbg_port __iomem *ehci_debug; +static unsigned int dbgp_endpoint_out; + +struct ehci_dev { + u32 bus; + u32 slot; + u32 func; +}; + +static struct ehci_dev ehci_dev; + +#define USB_DEBUG_DEVNUM 127 + +#define DBGP_DATA_TOGGLE 0x8800 + +static inline u32 dbgp_pid_update(u32 x, u32 tok) +{ + return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); +} + +static inline u32 dbgp_len_update(u32 x, u32 len) +{ + return (x & ~0x0f) | (len & 0x0f); +} + +/* + * USB Packet IDs (PIDs) + */ + +/* token */ +#define USB_PID_OUT 0xe1 +#define USB_PID_IN 0x69 +#define USB_PID_SOF 0xa5 +#define USB_PID_SETUP 0x2d +/* handshake */ +#define USB_PID_ACK 0xd2 +#define USB_PID_NAK 0x5a +#define USB_PID_STALL 0x1e +#define USB_PID_NYET 0x96 +/* data */ +#define USB_PID_DATA0 0xc3 +#define USB_PID_DATA1 0x4b +#define USB_PID_DATA2 0x87 +#define USB_PID_MDATA 0x0f +/* Special */ +#define USB_PID_PREAMBLE 0x3c +#define USB_PID_ERR 0x3c +#define USB_PID_SPLIT 0x78 +#define USB_PID_PING 0xb4 +#define USB_PID_UNDEF_0 0xf0 + +#define USB_PID_DATA_TOGGLE 0x88 +#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) + +#define PCI_CAP_ID_EHCI_DEBUG 0xa + +#define HUB_ROOT_RESET_TIME 50 /* times are in msec */ +#define HUB_SHORT_RESET_TIME 10 +#define HUB_LONG_RESET_TIME 200 +#define HUB_RESET_TIMEOUT 500 + +#define DBGP_MAX_PACKET 8 + +static int dbgp_wait_until_complete(void) +{ + u32 ctrl; + int loop = 0x100000; + + do { + ctrl = readl(&ehci_debug->control); + /* Stop when the transaction is finished */ + if (ctrl & DBGP_DONE) + break; + } while (--loop > 0); + + if (!loop) + return -1; + + /* + * Now that we have observed the completed transaction, + * clear the done bit. + */ + writel(ctrl | DBGP_DONE, &ehci_debug->control); + return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); +} + +static void dbgp_mdelay(int ms) +{ + int i; + + while (ms--) { + for (i = 0; i < 1000; i++) + outb(0x1, 0x80); + } +} + +static void dbgp_breath(void) +{ + /* Sleep to give the debug port a chance to breathe */ +} + +static int dbgp_wait_until_done(unsigned ctrl) +{ + u32 pids, lpid; + int ret; + int loop = 3; + +retry: + writel(ctrl | DBGP_GO, &ehci_debug->control); + ret = dbgp_wait_until_complete(); + pids = readl(&ehci_debug->pids); + lpid = DBGP_PID_GET(pids); + + if (ret < 0) + return ret; + + /* + * If the port is getting full or it has dropped data + * start pacing ourselves, not necessary but it's friendly. + */ + if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) + dbgp_breath(); + + /* If I get a NACK reissue the transmission */ + if (lpid == USB_PID_NAK) { + if (--loop > 0) + goto retry; + } + + return ret; +} + +static void dbgp_set_data(const void *buf, int size) +{ + const unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = hi = 0; + for (i = 0; i < 4 && i < size; i++) + lo |= bytes[i] << (8*i); + for (; i < 8 && i < size; i++) + hi |= bytes[i] << (8*(i - 4)); + writel(lo, &ehci_debug->data03); + writel(hi, &ehci_debug->data47); +} + +static void dbgp_get_data(void *buf, int size) +{ + unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = readl(&ehci_debug->data03); + hi = readl(&ehci_debug->data47); + for (i = 0; i < 4 && i < size; i++) + bytes[i] = (lo >> (8*i)) & 0xff; + for (; i < 8 && i < size; i++) + bytes[i] = (hi >> (8*(i - 4))) & 0xff; +} + +static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, + const char *bytes, int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_OUT); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + dbgp_set_data(bytes, size); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + return ret; +} + +static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, + int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_IN); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl &= ~DBGP_OUT; + ctrl |= DBGP_GO; + + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + if (size > ret) + size = ret; + dbgp_get_data(data, size); + return ret; +} + +static int dbgp_control_msg(unsigned devnum, int requesttype, int request, + int value, int index, void *data, int size) +{ + u32 pids, addr, ctrl; + struct usb_ctrlrequest req; + int read; + int ret; + + read = (requesttype & USB_DIR_IN) != 0; + if (size > (read ? DBGP_MAX_PACKET:0)) + return -1; + + /* Compute the control message */ + req.bRequestType = requesttype; + req.bRequest = request; + req.wValue = value; + req.wIndex = index; + req.wLength = size; + + pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); + addr = DBGP_EPADDR(devnum, 0); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, sizeof(req)); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + /* Send the setup message */ + dbgp_set_data(&req, sizeof(req)); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + /* Read the result */ + return dbgp_bulk_read(devnum, 0, data, size); +} + + +/* Find a PCI capability */ +static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) +{ + u8 pos; + int bytes; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) + return 0; + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + u8 id; + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } + return 0; +} + +static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) +{ + u32 class; + + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); + if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) + return 0; + + return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); +} + +static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) +{ + u32 bus, slot, func; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + unsigned cap; + + cap = __find_dbgp(bus, slot, func); + + if (!cap) + continue; + if (ehci_num-- != 0) + continue; + *rbus = bus; + *rslot = slot; + *rfunc = func; + return cap; + } + } + } + return 0; +} + +static int ehci_reset_port(int port) +{ + u32 portsc; + u32 delay_time, delay; + int loop; + + /* Reset the usb debug port */ + portsc = readl(&ehci_regs->port_status[port - 1]); + portsc &= ~PORT_PE; + portsc |= PORT_RESET; + writel(portsc, &ehci_regs->port_status[port - 1]); + + delay = HUB_ROOT_RESET_TIME; + for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; + delay_time += delay) { + dbgp_mdelay(delay); + + portsc = readl(&ehci_regs->port_status[port - 1]); + if (portsc & PORT_RESET) { + /* force reset to complete */ + loop = 2; + writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), + &ehci_regs->port_status[port - 1]); + do { + portsc = readl(&ehci_regs->port_status[port-1]); + } while ((portsc & PORT_RESET) && (--loop > 0)); + } + + /* Device went away? */ + if (!(portsc & PORT_CONNECT)) + return -ENOTCONN; + + /* bomb out completely if something weird happend */ + if ((portsc & PORT_CSC)) + return -EINVAL; + + /* If we've finished resetting, then break out of the loop */ + if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) + return 0; + } + return -EBUSY; +} + +static int ehci_wait_for_port(int port) +{ + u32 status; + int ret, reps; + + for (reps = 0; reps < 3; reps++) { + dbgp_mdelay(100); + status = readl(&ehci_regs->status); + if (status & STS_PCD) { + ret = ehci_reset_port(port); + if (ret == 0) + return 0; + } + } + return -ENOTCONN; +} + +#ifdef DBGP_DEBUG +# define dbgp_printk early_printk +#else +static inline void dbgp_printk(const char *fmt, ...) { } +#endif + +typedef void (*set_debug_port_t)(int port); + +static void default_set_debug_port(int port) +{ +} + +static set_debug_port_t set_debug_port = default_set_debug_port; + +static void nvidia_set_debug_port(int port) +{ + u32 dword; + dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x74); + dword &= ~(0x0f<<12); + dword |= ((port & 0x0f)<<12); + write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, + dword); + dbgp_printk("set debug port to %d\n", port); +} + +static void __init detect_set_debug_port(void) +{ + u32 vendorid; + + vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x00); + + if ((vendorid & 0xffff) == 0x10de) { + dbgp_printk("using nvidia set_debug_port\n"); + set_debug_port = nvidia_set_debug_port; + } +} + +static int __init ehci_setup(void) +{ + struct usb_debug_descriptor dbgp_desc; + u32 cmd, ctrl, status, portsc, hcs_params; + u32 debug_port, new_debug_port = 0, n_ports; + u32 devnum; + int ret, i; + int loop; + int port_map_tried; + int playtimes = 3; + +try_next_time: + port_map_tried = 0; + +try_next_port: + + hcs_params = readl(&ehci_caps->hcs_params); + debug_port = HCS_DEBUG_PORT(hcs_params); + n_ports = HCS_N_PORTS(hcs_params); + + dbgp_printk("debug_port: %d\n", debug_port); + dbgp_printk("n_ports: %d\n", n_ports); + + for (i = 1; i <= n_ports; i++) { + portsc = readl(&ehci_regs->port_status[i-1]); + dbgp_printk("portstatus%d: %08x\n", i, portsc); + } + + if (port_map_tried && (new_debug_port != debug_port)) { + if (--playtimes) { + set_debug_port(new_debug_port); + goto try_next_time; + } + return -1; + } + + loop = 10; + /* Reset the EHCI controller */ + cmd = readl(&ehci_regs->command); + cmd |= CMD_RESET; + writel(cmd, &ehci_regs->command); + do { + cmd = readl(&ehci_regs->command); + } while ((cmd & CMD_RESET) && (--loop > 0)); + + if (!loop) { + dbgp_printk("can not reset ehci\n"); + return -1; + } + dbgp_printk("ehci reset done\n"); + + /* Claim ownership, but do not enable yet */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_OWNER; + ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); + writel(ctrl, &ehci_debug->control); + + /* Start the ehci running */ + cmd = readl(&ehci_regs->command); + cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); + cmd |= CMD_RUN; + writel(cmd, &ehci_regs->command); + + /* Ensure everything is routed to the EHCI */ + writel(FLAG_CF, &ehci_regs->configured_flag); + + /* Wait until the controller is no longer halted */ + loop = 10; + do { + status = readl(&ehci_regs->status); + } while ((status & STS_HALT) && (--loop > 0)); + + if (!loop) { + dbgp_printk("ehci can be started\n"); + return -1; + } + dbgp_printk("ehci started\n"); + + /* Wait for a device to show up in the debug port */ + ret = ehci_wait_for_port(debug_port); + if (ret < 0) { + dbgp_printk("No device found in debug port\n"); + goto next_debug_port; + } + dbgp_printk("ehci wait for port done\n"); + + /* Enable the debug port */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_CLAIM; + writel(ctrl, &ehci_debug->control); + ctrl = readl(&ehci_debug->control); + if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { + dbgp_printk("No device in debug port\n"); + writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); + goto err; + } + dbgp_printk("debug ported enabled\n"); + + /* Completely transfer the debug device to the debug controller */ + portsc = readl(&ehci_regs->port_status[debug_port - 1]); + portsc &= ~PORT_PE; + writel(portsc, &ehci_regs->port_status[debug_port - 1]); + + dbgp_mdelay(100); + + /* Find the debug device and make it device number 127 */ + for (devnum = 0; devnum <= 127; devnum++) { + ret = dbgp_control_msg(devnum, + USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, + &dbgp_desc, sizeof(dbgp_desc)); + if (ret > 0) + break; + } + if (devnum > 127) { + dbgp_printk("Could not find attached debug device\n"); + goto err; + } + if (ret < 0) { + dbgp_printk("Attached device is not a debug device\n"); + goto err; + } + dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; + + /* Move the device to 127 if it isn't already there */ + if (devnum != USB_DEBUG_DEVNUM) { + ret = dbgp_control_msg(devnum, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); + if (ret < 0) { + dbgp_printk("Could not move attached device to %d\n", + USB_DEBUG_DEVNUM); + goto err; + } + devnum = USB_DEBUG_DEVNUM; + dbgp_printk("debug device renamed to 127\n"); + } + + /* Enable the debug interface */ + ret = dbgp_control_msg(USB_DEBUG_DEVNUM, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); + if (ret < 0) { + dbgp_printk(" Could not enable the debug device\n"); + goto err; + } + dbgp_printk("debug interface enabled\n"); + + /* Perform a small write to get the even/odd data state in sync + */ + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); + if (ret < 0) { + dbgp_printk("dbgp_bulk_write failed: %d\n", ret); + goto err; + } + dbgp_printk("small write doned\n"); + + return 0; +err: + /* Things didn't work so remove my claim */ + ctrl = readl(&ehci_debug->control); + ctrl &= ~(DBGP_CLAIM | DBGP_OUT); + writel(ctrl, &ehci_debug->control); + return -1; + +next_debug_port: + port_map_tried |= (1<<(debug_port - 1)); + new_debug_port = ((debug_port-1+1)%n_ports) + 1; + if (port_map_tried != ((1<> 29) & 0x7; + bar = (bar * 4) + 0xc; + offset = (debug_port >> 16) & 0xfff; + dbgp_printk("bar: %02x offset: %03x\n", bar, offset); + if (bar != PCI_BASE_ADDRESS_0) { + dbgp_printk("only debug ports on bar 1 handled.\n"); + + return -1; + } + + bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); + if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { + dbgp_printk("only simple 32bit mmio bars supported\n"); + + return -1; + } + + /* double check if the mem space is enabled */ + byte = read_pci_config_byte(bus, slot, func, 0x04); + if (!(byte & 0x2)) { + byte |= 0x02; + write_pci_config_byte(bus, slot, func, 0x04, byte); + dbgp_printk("mmio for ehci enabled\n"); + } + + /* + * FIXME I don't have the bar size so just guess PAGE_SIZE is more + * than enough. 1K is the biggest I have seen. + */ + set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); + ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); + ehci_bar += bar_val & ~PAGE_MASK; + dbgp_printk("ehci_bar: %p\n", ehci_bar); + + ehci_caps = ehci_bar; + ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); + ehci_debug = ehci_bar + offset; + ehci_dev.bus = bus; + ehci_dev.slot = slot; + ehci_dev.func = func; + + detect_set_debug_port(); + + ret = ehci_setup(); + if (ret < 0) { + dbgp_printk("ehci_setup failed\n"); + ehci_debug = 0; + + return -1; + } + + return 0; +} + +static void early_dbgp_write(struct console *con, const char *str, u32 n) +{ + int chunk, ret; + + if (!ehci_debug) + return; + while (n > 0) { + chunk = n; + if (chunk > DBGP_MAX_PACKET) + chunk = DBGP_MAX_PACKET; + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, + dbgp_endpoint_out, str, chunk); + str += chunk; + n -= chunk; + } +} + +static struct console early_dbgp_console = { + .name = "earlydbg", + .write = early_dbgp_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; +#endif + /* Console interface to a host file on AMD's SimNow! */ static int simnow_fd; @@ -165,6 +889,7 @@ enum { static noinline long simnow(long cmd, long a, long b, long c) { long ret; + asm volatile("cpuid" : "=a" (ret) : "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); @@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c) static void __init simnow_init(char *str) { char *fn = "klog"; + if (*str == '=') fn = ++str; /* error ignored */ @@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...) va_end(ap); } -static int __initdata keep_early; static int __init setup_early_printk(char *buf) { + int keep_early; + if (!buf) return 0; @@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf) return 0; early_console_initialized = 1; - if (strstr(buf, "keep")) - keep_early = 1; + keep_early = (strstr(buf, "keep") != NULL); if (!strncmp(buf, "serial", 6)) { early_serial_init(buf + 6); @@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf) simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; +#ifdef CONFIG_EARLY_PRINTK_DBGP + } else if (!strncmp(buf, "dbgp", 4)) { + if (early_dbgp_init(buf+4) < 0) + return 0; + early_console = &early_dbgp_console; + /* + * usb subsys will reset ehci controller, so don't keep + * that early console + */ + keep_early = 0; +#endif #ifdef CONFIG_HVC_XEN } else if (!strncmp(buf, "xen", 3)) { early_console = &xenboot_console; @@ -251,4 +988,23 @@ static int __init setup_early_printk(char *buf) register_console(early_console); return 0; } + +void __init enable_debug_console(char *buf) +{ +#ifdef DBGP_DEBUG + struct console *old_early_console = NULL; + + if (early_console_initialized && early_console) { + old_early_console = early_console; + unregister_console(early_console); + early_console_initialized = 0; + } + + setup_early_printk(buf); + + if (early_console == old_early_console && old_early_console) + register_console(old_early_console); +#endif +} + early_param("earlyprintk", setup_early_printk); -- cgit v1.2.3 From a4dbc34d181e87a0d724dee365921e9251f831d4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:14:28 -0700 Subject: x86: add setup_ioapic_ids for numaq in x86_quirks Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/numaq_32.c | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f3..72ba06314c7b 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1728,10 +1729,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) return; -#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20a..2434467ddf72 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } +static int __init numaq_setup_ioapic_ids(void) +{ + /* so can skip it */ + return 1; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, -- cgit v1.2.3 From e8c48efdb971cfb1b043076cf68be535d461a20b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:44 -0700 Subject: x86: mach_summit to summit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/summit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f044ba..7b987852e876 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -- cgit v1.2.3 From 39eacc20f93614f7bab63eb1d45060503afc46d0 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 25 Jul 2008 23:30:13 +0800 Subject: arch/x86/kernel/visws_quirks.c: Removed duplicated #include Removed duplicated #include in arch/x86/kernel/visws_quirks.c. asm/apic.h asm/arch_hooks.h asm/io.h asm/visws/cobalt.h asm/visws/lithium.h asm/visws/piix4.h linux/init.h linux/interrupt.h linux/smp.h Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/visws_quirks.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c48..0c75691bcf5f 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -29,41 +29,26 @@ #include #include #include -#include #include #include #include "mach_apic.h" -#include -#include - #include -#include -#include -#include -#include #include #include -#include #include -#include #include #include -#include #include #include extern int no_broadcast; -#include #include -#include -#include -#include char visws_board_type = -1; char visws_board_rev = -1; -- cgit v1.2.3 From 6ffac1e90a17ea0aded5c581204397421eec91b6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 24 Jul 2008 18:07:56 -0700 Subject: x64, fpu: fix possible FPU leakage in error conditions On Thu, Jul 24, 2008 at 03:43:44PM -0700, Linus Torvalds wrote: > So how about this patch as a starting point? This is the RightThing(tm) to > do regardless, and if it then makes it easier to do some other cleanups, > we should do it first. What do you think? restore_fpu_checking() calls init_fpu() in error conditions. While this is wrong(as our main intention is to clear the fpu state of the thread), this was benign before commit 92d140e21f1 ("x86: fix taking DNA during 64bit sigreturn"). Post commit 92d140e21f1, live FPU registers may not belong to this process at this error scenario. In the error condition for restore_fpu_checking() (especially during the 64bit signal return), we are doing init_fpu(), which saves the live FPU register state (possibly belonging to some other process context) into the thread struct (through unlazy_fpu() in init_fpu()). This is wrong and can leak the FPU data. For the signal handler restore error condition in restore_i387(), clear the fpu state present in the thread struct(before ultimately sending a SIGSEGV for badframe). For the paranoid error condition check in math_state_restore(), send a SIGSEGV, if we fail to restore the state. Signed-off-by: Suresh Siddha Cc: Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 11 ++++++++++- arch/x86/kernel/traps_64.c | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd651..ca316b5b742c 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; } /* diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420c..513caaca7115 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ - restore_fpu_checking(&me->thread.xstate->fxsave); + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + stts(); + force_sig(SIGSEGV, me); + return; + } task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } -- cgit v1.2.3 From 6524d938b3360504b43a1278b5a8403e85383d1a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:30 -0700 Subject: cpumask: put cpumask_of_cpu_map in the initdata section * Create the cpumask_of_cpu_map statically in the init data section using NR_CPUS but replace it during boot up with one sized by nr_cpu_ids (num possible cpus). Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Jack Steiner Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c006..1cd53dfcd309 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -81,10 +81,12 @@ static void __init setup_per_cpu_maps(void) } #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -cpumask_t *cpumask_of_cpu_map __read_mostly; -EXPORT_SYMBOL(cpumask_of_cpu_map); - -/* requires nr_cpu_ids to be initialized */ +/* + * Replace static cpumask_of_cpu_map in the initdata section, + * with one that's allocated sized by the possible number of cpus. + * + * (requires nr_cpu_ids to be initialized) + */ static void __init setup_cpumask_of_cpu(void) { int i; -- cgit v1.2.3 From 0bc3cc03fa6e1c20aecb5a33356bcaae410640b9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:31 -0700 Subject: cpumask: change cpumask_of_cpu_ptr to use new cpumask_of_cpu * Replace previous instances of the cpumask_of_cpu_ptr* macros with a the new (lvalue capable) generic cpumask_of_cpu(). Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Jack Steiner Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/cstate.c | 3 +-- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 10 +++------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 +++++---------- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 12 ++++-------- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 3 +-- arch/x86/kernel/cpu/intel_cacheinfo.c | 3 +-- arch/x86/kernel/ldt.c | 6 ++---- arch/x86/kernel/microcode.c | 17 +++++------------ arch/x86/kernel/reboot.c | 11 +++-------- 9 files changed, 25 insertions(+), 55 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 9220cf46aa10..c2502eb9aa83 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, new_mask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ff2fff56f0a8..dd097b835839 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - cpumask_of_cpu_ptr_next(cpu_mask, i); - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } @@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; - cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpu_mask), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 53c7b6936973..c45ca6d4dce1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -479,12 +479,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1017,7 +1016,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1032,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1107,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; - cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1159,8 +1156,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(newmask, pol->cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1182,7 +1178,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = *newmask; + pol->cpus = cpumask_of_cpu(pol->cpu); else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1248,7 +1244,6 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1258,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index ca2ac13b7af2..15e13c01cc36 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(*covered_cpus)) { - cpumask_of_cpu_ptr_declare(new_mask); - + if (!cpus_empty(*covered_cpus)) for_each_cpu_mask_nr(j, *covered_cpus) { - cpumask_of_cpu_ptr_next(new_mask, j); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } - } tmp = freqs.new; freqs.new = freqs.old; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2f3728dc24f6..191f7263c61d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - cpumask_of_cpu_ptr(newmask, cpu); - return _speedstep_get(newmask); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 650d40f7912b..6b0a10b002f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; - cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, newmask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3f..b68e21f06f4f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP - cpumask_of_cpu_ptr_declare(mask); - preempt_disable(); load_LDT(pc); - cpumask_of_cpu_ptr_next(mask, smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590e..652fa5c38ebe 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,7 +388,6 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -405,8 +404,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev, if (end == buf) return -EINVAL; if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; + cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 06a9f643817e..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -414,25 +414,20 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; - cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) { + cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) { + if (!cpu_online(reboot_cpu_id)) reboot_cpu_id = smp_processor_id(); - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. -- cgit v1.2.3 From 3964cd3a6721f18ef1dd67b9a0a89dc5b36683b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 26 Jul 2008 19:35:20 +0200 Subject: x86: visws_quirks, fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/visws_quirks.c: In function ‘visws_early_detect’: arch/x86/kernel/visws_quirks.c:290: error: ‘skip_ioapic_setup’ undeclared (first use in this function) arch/x86/kernel/visws_quirks.c:290: error: (Each undeclared identifier is reported only once arch/x86/kernel/visws_quirks.c:290: error: for each function it appears in.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/visws_quirks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 0c75691bcf5f..3059eb45a915 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Jul 2008 19:44:49 -0700 Subject: dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Andi Kleen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-dma.c | 27 ++++++++++++++++----------- arch/x86/kernel/pci-gart_64.c | 3 +-- arch/x86/kernel/pci-nommu.c | 14 +------------- arch/x86/kernel/pci-swiotlb_64.c | 2 +- 5 files changed, 20 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 19e7fc7c2c4f..1eb86be93d7a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -544,7 +544,7 @@ error: return ret; } -static const struct dma_mapping_ops calgary_dma_ops = { +static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551bb..37544123896d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,7 +11,7 @@ static int forbid_dac __read_mostly; -const struct dma_mapping_ops *dma_ops; +struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -312,6 +312,8 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) int dma_supported(struct device *dev, u64 mask) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { dev_info(dev, "PCI: Disallowing DAC for device\n"); @@ -319,8 +321,8 @@ int dma_supported(struct device *dev, u64 mask) } #endif - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -367,6 +369,7 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { + struct dma_mapping_ops *ops = get_dma_ops(dev); void *memory = NULL; struct page *page; unsigned long dma_mask = 0; @@ -435,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Let low level make its own zone decisions */ gfp &= ~(GFP_DMA32|GFP_DMA); - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, dma_handle, gfp); return NULL; } @@ -448,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } } - if (dma_ops->alloc_coherent) { + if (ops->alloc_coherent) { free_pages((unsigned long)memory, get_order(size)); gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + return ops->alloc_coherent(dev, size, dma_handle, gfp); } - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + if (ops->map_simple) { + *dma_handle = ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -477,12 +480,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ if (dma_release_coherent(dev, order, vaddr)) return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); + if (ops->unmap_single) + ops->unmap_single(dev, bus, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d2..744126e64950 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -692,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) extern int agp_amd64_init(void); -static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, +static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff3..3f91f71cdc3e 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { +struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c20..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } -const struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, -- cgit v1.2.3 From 1956a96de488feb05e95c08c9d5e80f63a4be2b1 Mon Sep 17 00:00:00 2001 From: Alexis Bruemmer Date: Fri, 25 Jul 2008 19:44:51 -0700 Subject: x86 calgary: fix handling of devices that aren't behind the Calgary The calgary code can give drivers addresses above 4GB which is very bad for hardware that is only 32bit DMA addressable. With this patch, the calgary code sets the global dma_ops to swiotlb or nommu properly, and the dma_ops of devices behind the Calgary/CalIOC2 to calgary_dma_ops. So the calgary code can handle devices safely that aren't behind the Calgary/CalIOC2. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Alexis Bruemmer Signed-off-by: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/pci-calgary_64.c | 71 +++++++++++++++------------------------- 1 file changed, 26 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1eb86be93d7a..b67a4b1d4eae 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -37,6 +37,7 @@ #include #include #include + #include #include #include @@ -413,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev, } } -static int calgary_nontranslate_map_sg(struct device* dev, - struct scatterlist *sg, int nelems, int direction) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nelems, i) { - struct page *p = sg_page(s); - - BUG_ON(!p); - s->dma_address = virt_to_bus(sg_virt(s)); - s->dma_length = s->length; - } - return nelems; -} - static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { @@ -439,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, unsigned long entry; int i; - if (!translation_enabled(tbl)) - return calgary_nontranslate_map_sg(dev, sg, nelems, direction); - for_each_sg(sg, s, nelems, i) { BUG_ON(!sg_page(s)); @@ -477,7 +459,6 @@ error: static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t dma_handle = bad_dma_address; void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; @@ -486,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, uaddr = (unsigned long)vaddr; npages = num_dma_pages(uaddr, size); - if (translation_enabled(tbl)) - dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); - else - dma_handle = virt_to_bus(vaddr); - - return dma_handle; + return iommu_alloc(dev, tbl, vaddr, npages, direction); } static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, @@ -500,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - if (!translation_enabled(tbl)) - return; - npages = num_dma_pages(dma_handle, size); iommu_free(tbl, dma_handle, npages); } @@ -525,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, goto error; memset(ret, 0, size); - if (translation_enabled(tbl)) { - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) - goto free; - - *dma_handle = mapping; - } else /* non translated slot */ - *dma_handle = virt_to_bus(ret); - + /* set up tces to cover the allocated range */ + mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); + if (mapping == bad_dma_address) + goto free; + *dma_handle = mapping; return ret; - free: free_pages((unsigned long)ret, get_order(size)); ret = NULL; @@ -1241,6 +1208,16 @@ static int __init calgary_init(void) goto error; } while (1); + dev = NULL; + for_each_pci_dev(dev) { + struct iommu_table *tbl; + + tbl = find_iommu_table(&dev->dev); + + if (translation_enabled(tbl)) + dev->dev.archdata.dma_ops = &calgary_dma_ops; + } + return ret; error: @@ -1262,6 +1239,7 @@ error: calgary_disable_translation(dev); calgary_free_bus(dev); pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ + dev->dev.archdata.dma_ops = NULL; } while (1); return ret; @@ -1503,6 +1481,10 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, debugging ? "enabled" : "disabled"); + + /* swiotlb for devices that aren't behind the Calgary. */ + if (max_pfn > MAX_DMA32_PFN) + swiotlb = 1; } return; @@ -1519,7 +1501,7 @@ int __init calgary_iommu_init(void) { int ret; - if (no_iommu || swiotlb) + if (no_iommu || (swiotlb && !calgary_detected)) return -ENODEV; if (!calgary_detected) @@ -1532,15 +1514,14 @@ int __init calgary_iommu_init(void) if (ret) { printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " "falling back to no_iommu\n", ret); - if (max_pfn > MAX_DMA32_PFN) - printk(KERN_ERR "WARNING more than 4GB of memory, " - "32bit PCI may malfunction.\n"); return ret; } force_iommu = 1; bad_dma_address = 0x0; - dma_ops = &calgary_dma_ops; + /* dma_ops is set to swiotlb or nommu */ + if (!dma_ops) + dma_ops = &nommu_dma_ops; return 0; } -- cgit v1.2.3 From 3ab83521378268044a448113c6aa9a9e245f4d2f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 25 Jul 2008 19:45:07 -0700 Subject: kexec jump This patch provides an enhancement to kexec/kdump. It implements the following features: - Backup/restore memory used by the original kernel before/after kexec. - Save/restore CPU state before/after kexec. The features of this patch can be used as a general method to call program in physical mode (paging turning off). This can be used to call BIOS code under Linux. kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 Usage example of calling some physical mode code and return: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_KEXEC=y CONFIG_PM=y CONFIG_KEXEC_JUMP=y 2. Build patched kexec-tool or download the pre-built one. 3. Build some physical mode executable named such as "phy_mode" 4. Boot kernel compiled in step 1. 5. Load physical mode executable with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context --args-none phy_mode 6. Call physical mode executable with following shell command line: /sbin/kexec -e Implementation point: To support jumping without reserving memory. One shadow backup page (source page) is allocated for each page used by kexeced code image (destination page). When do kexec_load, the image of kexeced code is loaded into source pages, and before executing, the destination pages and the source pages are swapped, so the contents of destination pages are backupped. Before jumping to the kexeced code image and after jumping back to the original kernel, the destination pages and the source pages are swapped too. C ABI (calling convention) is used as communication protocol between kernel and called code. A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to indicate that the loaded kernel image is used for jumping back. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying Acked-by: Vivek Goyal Cc: "Eric W. Biederman" Cc: Pavel Machek Cc: Nigel Cunningham Cc: "Rafael J. Wysocki" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 27 ++++-- arch/x86/kernel/machine_kexec_64.c | 2 +- arch/x86/kernel/relocate_kernel_32.S | 174 ++++++++++++++++++++++++++++++----- 3 files changed, 172 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..2b67609d0a1c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -22,6 +22,7 @@ #include #include #include +#include #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) static u32 kexec_pgd[1024] PAGE_ALIGNED; @@ -85,10 +86,12 @@ static void load_segments(void) * reboot code buffer to allow us to avoid allocations * later. * - * Currently nothing. + * Make control page executable. */ int machine_kexec_prepare(struct kimage *image) { + if (nx_enabled) + set_pages_x(image->control_code_page, 1); return 0; } @@ -98,16 +101,24 @@ int machine_kexec_prepare(struct kimage *image) */ void machine_kexec_cleanup(struct kimage *image) { + if (nx_enabled) + set_pages_nx(image->control_code_page, 1); } /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + asmlinkage unsigned long + (*relocate_kernel_ptr)(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); tracer_disable(); @@ -115,10 +126,11 @@ NORET_TYPE void machine_kexec(struct kimage *image) local_irq_disable(); control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE); + memcpy(control_page, relocate_kernel, PAGE_SIZE/2); + relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; + page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_PGD] = __pa(kexec_pgd); page_list[VA_PGD] = (unsigned long)kexec_pgd; #ifdef CONFIG_X86_PAE @@ -131,6 +143,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) page_list[VA_PTE_0] = (unsigned long)kexec_pte0; page_list[PA_PTE_1] = __pa(kexec_pte1); page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -149,8 +162,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) set_idt(phys_to_virt(0),0); /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae); + image->start = relocate_kernel_ptr((unsigned long)image->head, + (unsigned long)page_list, + image->start, cpu_has_pae, + image->preserve_context); } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..703310a99023 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -20,11 +20,44 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAE_PGD_ATTR (_PAGE_PRESENT) +/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are + * used to save some data for jumping back + */ +#define DATA(offset) (PAGE_SIZE/2+(offset)) + +/* Minimal CPU state */ +#define ESP DATA(0x0) +#define CR0 DATA(0x4) +#define CR3 DATA(0x8) +#define CR4 DATA(0xc) + +/* other data */ +#define CP_VA_CONTROL_PAGE DATA(0x10) +#define CP_PA_PGD DATA(0x14) +#define CP_PA_SWAP_PAGE DATA(0x18) +#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) + .text .align PAGE_SIZE .globl relocate_kernel relocate_kernel: - movl 8(%esp), %ebp /* list of pages */ + /* Save the CPU context, used for jumping back */ + + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + pushf + + movl 20+8(%esp), %ebp /* list of pages */ + movl PTR(VA_CONTROL_PAGE)(%ebp), %edi + movl %esp, ESP(%edi) + movl %cr0, %eax + movl %eax, CR0(%edi) + movl %cr3, %eax + movl %eax, CR3(%edi) + movl %cr4, %eax + movl %eax, CR4(%edi) #ifdef CONFIG_X86_PAE /* map the control page at its virtual address */ @@ -138,15 +171,25 @@ relocate_kernel: relocate_new_kernel: /* read the arguments and say goodbye to the stack */ - movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* list of pages */ - movl 12(%esp), %edx /* start address */ - movl 16(%esp), %ecx /* cpu_has_pae */ + movl 20+4(%esp), %ebx /* page_list */ + movl 20+8(%esp), %ebp /* list of pages */ + movl 20+12(%esp), %edx /* start address */ + movl 20+16(%esp), %ecx /* cpu_has_pae */ + movl 20+20(%esp), %esi /* preserve_context */ /* zero out flags, and disable interrupts */ pushl $0 popfl + /* save some information for jumping back */ + movl PTR(VA_CONTROL_PAGE)(%ebp), %edi + movl %edi, CP_VA_CONTROL_PAGE(%edi) + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, CP_PA_PGD(%edi) + movl PTR(PA_SWAP_PAGE)(%ebp), %eax + movl %eax, CP_PA_SWAP_PAGE(%edi) + movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) + /* get physical address of control page now */ /* this is impossible after page table switch */ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi @@ -197,8 +240,90 @@ identity_mapped: xorl %eax, %eax movl %eax, %cr3 + movl CP_PA_SWAP_PAGE(%edi), %eax + pushl %eax + pushl %ebx + call swap_pages + addl $8, %esp + + /* To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB, it's handy, and not processor dependent. + */ + xorl %eax, %eax + movl %eax, %cr3 + + /* set all of the registers to known values */ + /* leave %esp alone */ + + testl %esi, %esi + jnz 1f + xorl %edi, %edi + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp + ret +1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp +2: + call *%edx + + /* get the re-entry point of the peer system */ + movl 0(%esp), %ebp + call 1f +1: + popl %ebx + subl $(1b - relocate_kernel), %ebx + movl CP_VA_CONTROL_PAGE(%ebx), %edi + lea PAGE_SIZE(%ebx), %esp + movl CP_PA_SWAP_PAGE(%ebx), %eax + movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx + pushl %eax + pushl %edx + call swap_pages + addl $8, %esp + movl CP_PA_PGD(%ebx), %eax + movl %eax, %cr3 + movl %cr0, %eax + orl $(1<<31), %eax + movl %eax, %cr0 + lea PAGE_SIZE(%edi), %esp + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax + ret + +virtual_mapped: + movl CR4(%edi), %eax + movl %eax, %cr4 + movl CR3(%edi), %eax + movl %eax, %cr3 + movl CR0(%edi), %eax + movl %eax, %cr0 + movl ESP(%edi), %esp + movl %ebp, %eax + + popf + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + /* Do the copies */ - movl %ebx, %ecx +swap_pages: + movl 8(%esp), %edx + movl 4(%esp), %ecx + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + movl %ecx, %ebx jmp 1f 0: /* top, read another word from the indirection page */ @@ -226,27 +351,28 @@ identity_mapped: movl %ecx, %esi /* For every source page do a copy */ andl $0xfffff000, %esi + movl %edi, %eax + movl %esi, %ebp + + movl %edx, %edi movl $1024, %ecx rep ; movsl - jmp 0b -3: - - /* To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB, it's handy, and not processor dependent. - */ - xorl %eax, %eax - movl %eax, %cr3 + movl %ebp, %edi + movl %eax, %esi + movl $1024, %ecx + rep ; movsl - /* set all of the registers to known values */ - /* leave %esp alone */ + movl %eax, %edi + movl %edx, %esi + movl $1024, %ecx + rep ; movsl - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorl %esi, %esi - xorl %edi, %edi - xorl %ebp, %ebp + lea PAGE_SIZE(%ebp), %esi + jmp 0b +3: + popl %esi + popl %edi + popl %ebx + popl %ebp ret -- cgit v1.2.3 From 89081d17f7bb81d89fa1aa9b70f821c5cf4d39e9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 25 Jul 2008 19:45:10 -0700 Subject: kexec jump: save/restore device state This patch implements devices state save/restore before after kexec. This patch together with features in kexec_jump patch can be used for following: - A simple hibernation implementation without ACPI support. You can kexec a hibernating kernel, save the memory image of original system and shutdown the system. When resuming, you restore the memory image of original system via ordinary kexec load then jump back. - Kernel/system debug through making system snapshot. You can make system snapshot, jump back, do some thing and make another system snapshot. - Cooperative multi-kernel/system. With kexec jump, you can switch between several kernels/systems quickly without boot process except the first time. This appears like swap a whole kernel/system out/in. - A general method to call program in physical mode (paging turning off). This can be used to invoke BIOS code under Linux. The following user-space tools can be used with kexec jump: - kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 - makedumpfile with patches are used as memory image saving tool, it can exclude free pages from original kernel memory image file. The patches and the precompiled makedumpfile can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-src_cvs_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-patches_cvs_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile_cvs_kh10 - An initramfs image can be used as the root file system of kexeced kernel. An initramfs image built with "BuildRoot" can be downloaded from the following URL: initramfs image: http://khibernation.sourceforge.net/download/release_v10/initramfs/rootfs_cvs_kh10.gz All user space tools above are included in the initramfs image. Usage example of simple hibernation: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_RELOCATABLE=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_PM=y CONFIG_HIBERNATION=y CONFIG_KEXEC_JUMP=y 2. Build an initramfs image contains kexec-tool and makedumpfile, or download the pre-built initramfs image, called rootfs.gz in following text. 3. Prepare a partition to save memory image of original kernel, called hibernating partition in following text. 4. Boot kernel compiled in step 1 (kernel A). 5. In the kernel A, load kernel compiled in step 1 (kernel B) with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context /boot/bzImage --mem-min=0x100000 --mem-max=0xffffff --initrd=rootfs.gz 6. Boot the kernel B with following shell command line: /sbin/kexec -e 7. The kernel B will boot as normal kexec. In kernel B the memory image of kernel A can be saved into hibernating partition as follow: jump_back_entry=`cat /proc/cmdline | tr ' ' '\n' | grep kexec_jump_back_entry | cut -d '='` echo $jump_back_entry > kexec_jump_back_entry cp /proc/vmcore dump.elf Then you can shutdown the machine as normal. 8. Boot kernel compiled in step 1 (kernel C). Use the rootfs.gz as root file system. 9. In kernel C, load the memory image of kernel A as follow: /sbin/kexec -l --args-none --entry=`cat kexec_jump_back_entry` dump.elf 10. Jump back to the kernel A as follow: /sbin/kexec -e Then, kernel A is resumed. Implementation point: To support jumping between two kernels, before jumping to (executing) the new kernel and jumping back to the original kernel, the devices are put into quiescent state, and the state of devices and CPU is saved. After jumping back from kexeced kernel and jumping to the new kernel, the state of devices and CPU are restored accordingly. The devices/CPU state save/restore code of software suspend is called to implement corresponding function. Known issues: - Because the segment number supported by sys_kexec_load is limited, hibernation image with many segments may not be load. This is planned to be eliminated by adding a new flag to sys_kexec_load to make a image can be loaded with multiple sys_kexec_load invoking. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying Acked-by: Vivek Goyal Cc: "Eric W. Biederman" Cc: Pavel Machek Cc: Nigel Cunningham Cc: "Rafael J. Wysocki" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 2b67609d0a1c..9fe478d98406 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -125,6 +125,18 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + if (image->preserve_context) { +#ifdef CONFIG_X86_IO_APIC + /* We need to put APICs in legacy mode so that we can + * get timer interrupts in second kernel. kexec/kdump + * paths already have calls to disable_IO_APIC() in + * one form or other. kexec jump path also need + * one. + */ + disable_IO_APIC(); +#endif + } + control_page = page_address(image->control_code_page); memcpy(control_page, relocate_kernel, PAGE_SIZE/2); -- cgit v1.2.3 From 36a033082b5243d45d508c5ccd47a754edbc6821 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 14 Mar 2008 17:46:38 -0700 Subject: x86: tracehook_signal_handler This makes the x86 signal handling code use tracehook_signal_handler() in place of calling into ptrace guts. The call is moved after the sa_mask processing, but there is no other change. This cleanup doesn't matter to existing debuggers, but is the sensible thing: have all facets of the handler setup complete before the debugger inspects the task again. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_32.c | 6 ++++-- arch/x86/kernel/signal_64.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd8933..22aae1683c14 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -558,8 +559,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); @@ -568,6 +567,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + return 0; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd651..3beb2db88c5a 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -444,8 +445,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -453,6 +452,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return ret; -- cgit v1.2.3 From eeea3c3ff8af7f6960a0515d46dff6479bdb91f9 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 16 Mar 2008 23:36:28 -0700 Subject: x86: tracehook syscall This changes x86 syscall tracing to use the new tracehook.h entry points. There is no change, only cleanup. Signed-off-by: Roland McGrath --- arch/x86/kernel/ptrace.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccce85db..19a7d2c40560 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1375,30 +1376,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -static void syscall_trace(struct pt_regs *regs) -{ - if (!(current->ptrace & PT_PTRACED)) - return; - -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} #ifdef CONFIG_X86_32 # define IS_IA32 1 @@ -1432,8 +1409,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; - if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; if (unlikely(current->audit_context)) { if (IS_IA32) @@ -1459,7 +1437,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); if (test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + tracehook_report_syscall_exit(regs, 0); /* * If TIF_SYSCALL_EMU is set, we only get here because of @@ -1475,6 +1453,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) * system call instruction. */ if (test_thread_flag(TIF_SINGLESTEP) && - (current->ptrace & PT_PTRACED)) + tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) send_sigtrap(current, regs, 0); } -- cgit v1.2.3 From 4dfcbb997aa9f3a6a3ed8c192f0dac28b027e08f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 15:37:09 -0700 Subject: x86 signals: use asm/syscall.h Replace local inlines with the asm/syscall.h interfaces that do the same things. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_64.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3beb2db88c5a..cb7cf0216ab7 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -346,35 +347,6 @@ give_sigsegv: return -EFAULT; } -/* - * Return -1L or the syscall number that @regs is executing. - */ -static long current_syscall(struct pt_regs *regs) -{ - /* - * We always sign-extend a -1 value being set here, - * so this is always either -1L or a syscall number. - */ - return regs->orig_ax; -} - -/* - * Return a value that is -EFOO if the system call in @regs->orig_ax - * returned an error. This only works for @regs from @current. - */ -static long current_syscall_ret(struct pt_regs *regs) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. - */ - return (int) regs->ax; -#endif - return regs->ax; -} - /* * OK, we're invoking a handler */ @@ -386,9 +358,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -511,9 +483,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v1.2.3 From 59e52130f04537d2c80ea44bb007cadd1ad29543 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 19:10:57 -0700 Subject: x86: tracehook: TIF_NOTIFY_RESUME This adds TIF_NOTIFY_RESUME support for x86, both 64-bit and 32-bit. When set, we call tracehook_notify_resume() on the way to user mode. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_32.c | 5 +++++ arch/x86/kernel/signal_64.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 22aae1683c14..4445d26efd47 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -663,5 +663,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + clear_thread_flag(TIF_IRET); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cb7cf0216ab7..d01e3f6ef26d 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -523,6 +523,11 @@ void do_notify_resume(struct pt_regs *regs, void *unused, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -- cgit v1.2.3 From d25ae38b7e005af03843833bbd811ffe8c5f8cb4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 19:39:03 -0700 Subject: x86: add apic probe for genapic 64bit - fix intr_remapping_enabled get assigned later, so need to check that in setup_apic_routing Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 6 ++++++ arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index b3ba969c50d2..6c9bfc9e1e95 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,11 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { + if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (!intr_remapping_enabled) + genapic = &apic_flat; + } + if (genapic == &apic_flat) { if (max_physical_apicid >= 8) genapic = &apic_physflat; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50a..fed9f68efd66 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled) + if (cpu_has_x2apic) return 1; return 0; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd4..958d537b4cc9 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -21,7 +21,7 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + if (cpu_has_x2apic && x2apic_phys) return 1; return 0; -- cgit v1.2.3 From 583323b9d2f624884a8c9563fb5a4d795f39ab07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 27 Jul 2008 21:43:11 +0200 Subject: x86: fix cpu hotplug on 32bit commit 3e9704739daf46a8ba6593d749c67b5f7cd633d2 ("x86: boot secondary cpus through initial_code") causes the kernel to crash when a CPU is brought online after the read only sections have been write protected. The write to initial_code in do_boot_cpu() fails. Move inital_code to .cpuinit.data section. Signed-off-by: Thomas Gleixner Acked-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441caf..a7010c3a377a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP 1: #endif /* CONFIG_SMP */ jmp *(initial_code) -.align 4 -ENTRY(initial_code) - .long i386_start_kernel /* * We depend on ET to be correct. This checks for 287/387. @@ -601,6 +598,11 @@ ignore_int: #endif iret +.section .cpuinit.data,"wa" +.align 4 +ENTRY(initial_code) + .long i386_start_kernel + .section .text /* * Real beginning of normal "text" segment -- cgit v1.2.3 From 8cb22bcb1f3ef70d4d48092e9b057175ad9ec78d Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 18 Jul 2008 16:03:52 -0500 Subject: x86: L3 cache index disable for 2.6.26 New versions of AMD processors have support to disable parts of their L3 caches if too many MCEs are generated by the L3 cache. This patch provides a /sysfs interface under the cache hierarchy to display which caches indices are disabled (if any) and to monitoring applications to disable a cache index. This patch does not set an automatic policy to disable the L3 cache. Policy decisions would need to be made by a RAS handler. This patch merely makes it easier to see what indices are currently disabled. Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 87 ++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ff517f0b8cc4..d6ea50e270e0 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -16,6 +16,7 @@ #include #include +#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -130,6 +131,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; + unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; @@ -251,6 +253,13 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } +static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +{ + if (index < 3) + return; + this_leaf->can_disable = 1; +} + static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; @@ -258,9 +267,12 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - else + if (boot_cpu_data.x86 >= 0x10) + amd_check_l3_disable(index, this_leaf); + + } else cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ @@ -637,6 +649,61 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + int i; + ssize_t ret = 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + for (i = 0; i < 2; i++) { + unsigned int reg; + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, + (reg & 0x30000) >> 16, reg & 0xfff); + + } + return ret; + } + return sprintf(buf, "Feature not enabled\n"); +} + +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + /* write the MSR value */ + unsigned int ret; + unsigned int index, val; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + if (strlen(buf) > 15) + return -EINVAL; + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + return 1; + } + return 0; +} + struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -657,6 +724,8 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); + static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -667,12 +736,10 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, + &cache_disable.attr, NULL }; -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -689,7 +756,15 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - return 0; + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : + 0; + return ret; } static struct sysfs_ops sysfs_ops = { -- cgit v1.2.3 From 7a4983bb5f94f6521aa3236fe5c035cf9bef543f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Jul 2008 13:34:21 +0200 Subject: x86: L3 cache index disable for 2.6.26, cleanups No change in functionality. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 115 ++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d6ea50e270e0..a61c9e399ba9 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -253,14 +253,16 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +static void __cpuinit +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; this_leaf->can_disable = 1; } -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -271,19 +273,20 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le amd_cpuid4(index, &eax, &ebx, &ecx); if (boot_cpu_data.x86 >= 0x10) amd_check_l3_disable(index, this_leaf); - - } else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } else { + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -649,59 +652,63 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - int i; - ssize_t ret = 0; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - for (i = 0; i < 2; i++) { - unsigned int reg; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, - (reg & 0x30000) >> 16, reg & 0xfff); + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + ssize_t ret = 0; + int i; - } - return ret; + if (!this_leaf->can_disable) + return sprintf(buf, "Feature not enabled\n"); + + for (i = 0; i < 2; i++) { + unsigned int reg; + + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", + buf, (reg & 0x30000) >> 16, reg & 0xfff); } - return sprintf(buf, "Feature not enabled\n"); + return ret; } -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +static ssize_t +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, + size_t count) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - /* write the MSR value */ - unsigned int ret; - unsigned int index, val; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - if (strlen(buf) > 15) - return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) - return -EINVAL; - val |= 0xc0000000; - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); - pci_write_config_dword(dev, 0x1BC + index * 4, val); - return 1; - } - return 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + unsigned int ret, index, val; + + if (!this_leaf->can_disable) + return 0; + + /* write the MSR value */ + + if (strlen(buf) > 15) + return -EINVAL; + + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + + return 1; } struct _cache_attr { -- cgit v1.2.3 From a24e8d36f5fc047dac9af6200322ed393f2e3175 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 22 Jul 2008 13:06:02 -0500 Subject: x86: L3 cache index disable for 2.6.26 On Monday 21 July 2008, Ingo Molnar wrote: > > applied to tip/x86/cpu, thanks Mark. > > > > I've done some coding style fixes for the new functions you've > > introduced, see that commit below. > > -tip testing found the following build failure: > > arch/x86/kernel/built-in.o: In function `show_cache_disable': > intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges' > arch/x86/kernel/built-in.o: In function `store_cache_disable': > intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges' > > please send a delta fix patch against the tip/x86/cpu branch: > > http://people.redhat.com/mingo/tip.git/README > > which has your patch plus the cleanup applied. delta fix patch follows. It removes the dependency on k8_northbridges. -Mark Langsdorf Operating System Research Center AMD Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 43 +++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a61c9e399ba9..a0c6c6ffed46 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -13,10 +13,10 @@ #include #include #include +#include #include #include -#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -135,6 +135,12 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +static struct pci_device_id k8_nb_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; + unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -655,16 +661,39 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +static struct pci_dev *get_k8_northbridge(int node) +{ + struct pci_dev *dev = NULL; + int i; + + for (i = 0; i <= node; i++) { + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_id[0], dev)); + if (!dev) + break; + } + return dev; +} + static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; ssize_t ret = 0; int i; if (!this_leaf->can_disable) return sprintf(buf, "Feature not enabled\n"); + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + for (i = 0; i < 2; i++) { unsigned int reg; @@ -686,14 +715,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; unsigned int ret, index, val; if (!this_leaf->can_disable) return 0; - /* write the MSR value */ - if (strlen(buf) > 15) return -EINVAL; @@ -704,6 +731,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, return -EINVAL; val |= 0xc0000000; + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); -- cgit v1.2.3 From cdcf772ed163651cacac8098b4974aba7f9e1c73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:20:08 +0200 Subject: x86 l3 cache index disable for 2 6 26 fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 39 ++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a0c6c6ffed46..535d662716de 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -136,9 +136,9 @@ struct _cpuid4_info { }; static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} }; unsigned short num_cache_leaves; @@ -190,9 +190,10 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit +amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -264,7 +265,7 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; - this_leaf->can_disable = 1; + this_leaf->can_disable = 1; } static int @@ -474,7 +475,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -511,7 +512,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -593,7 +594,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -675,7 +676,7 @@ static struct pci_dev *get_k8_northbridge(int node) if (!dev) break; } - return dev; + return dev; } static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) @@ -736,7 +737,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; } - + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); @@ -789,7 +790,7 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } @@ -800,9 +801,9 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, struct _index_kobject *this_leaf = to_object(kobj); ssize_t ret; - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count) : + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : 0; return ret; } -- cgit v1.2.3 From 239bd83104ec6bcba90221d8b0973d2565142ef8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:45:49 +0200 Subject: x86: L3 cache index disable for 2.6.26, fix #2 fix !PCI build failure: arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge': arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id' Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d763d24187c2..1677b55371a5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -135,11 +135,13 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +#ifdef CONFIG_PCI static struct pci_device_id k8_nb_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, {} }; +#endif unsigned short num_cache_leaves; @@ -663,6 +665,7 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +#ifdef CONFIG_PCI static struct pci_dev *get_k8_northbridge(int node) { struct pci_dev *dev = NULL; @@ -679,6 +682,12 @@ static struct pci_dev *get_k8_northbridge(int node) } return dev; } +#else +static struct pci_dev *get_k8_northbridge(int node) +{ + return NULL; +} +#endif static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { -- cgit v1.2.3 From 9a56a0f80b52cb41c5e0add47c7ce0bb2ef25eb0 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:13 +0200 Subject: x86: moved Intel microcode patch loader declarations to seperate header file Intel specific microcode declarations have been moved to a seperate header file. There are no code changes to the code itself and no side effects to other parts. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590e..0d654bd32928 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -93,6 +93,7 @@ #include #include #include +#include MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); -- cgit v1.2.3 From 8e61028dfdc6b8ca996abfe8f9baef6792ea2904 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:14 +0200 Subject: x86: typedef removal Removed typedefs. No functional changes to the code. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 0d654bd32928..74e6a77bf190 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -102,17 +102,17 @@ MODULE_LICENSE("GPL"); #define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof (u32)) #define get_totalsize(mc) \ - (((microcode_t *)mc)->hdr.totalsize ? \ - ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) #define get_datasize(mc) \ - (((microcode_t *)mc)->hdr.datasize ? \ - ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -130,7 +130,7 @@ static struct ucode_cpu_info { unsigned int sig; unsigned int pf; unsigned int rev; - microcode_t *mc; + struct microcode *mc; } ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) @@ -171,7 +171,7 @@ static void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - microcode_header_t *mc_header, int sig, int pf) + struct microcode_header *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -183,7 +183,7 @@ static inline int microcode_update_match(int cpu_num, static int microcode_sanity_check(void *mc) { - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -268,7 +268,7 @@ static int microcode_sanity_check(void *mc) static int get_maching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -355,7 +355,7 @@ static unsigned int user_buffer_size; /* it's size */ static long get_next_ucode(void **mc, long offset) { - microcode_header_t mc_header; + struct microcode_header mc_header; unsigned long total_size; /* No more data */ @@ -497,13 +497,13 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - microcode_header_t *mc_header; + struct microcode_header *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (microcode_header_t *)(buf + offset); + mc_header = (struct microcode_header *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { -- cgit v1.2.3 From c3b71bcec0380836caac9b524fa1585b469b7456 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:15 +0200 Subject: x86: move per CPU microcode structure declaration to header file This structure will be later used by other modules as well and needs therfore to be moved out to a header file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 74e6a77bf190..4e7b2f65fed6 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -125,13 +125,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); -static struct ucode_cpu_info { - int valid; - unsigned int sig; - unsigned int pf; - unsigned int rev; - struct microcode *mc; -} ucode_cpu_info[NR_CPUS]; +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) { -- cgit v1.2.3 From 1abae31096007cc993f67ae2576fe8f812270ad6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:16 +0200 Subject: x86: move microcode.c to microcode_intel.c Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/microcode.c | 855 -------------------------------------- arch/x86/kernel/microcode_intel.c | 855 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 856 insertions(+), 856 deletions(-) delete mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/microcode_intel.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec5..a9be2a0dde80 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 4e7b2f65fed6..000000000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null @@ -1,855 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/253668.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -//#define DEBUG /* pr_debug */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "1.14a" - -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) -#define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) -#define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - unsigned int val[2]; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc = NULL; - uci->valid = 1; - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); - uci->valid = 0; - return; - } - - uci->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); -} - -static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) - return 0; - return 1; -} - -static int microcode_sanity_check(void *mc) -{ - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; - int sum, orig_sum, ext_sigcount = 0, i; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - * return < 0 - error - */ -static int get_maching_microcode(void *mc, int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - void *new_mc; - - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; - - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; - ext_sig++; - } - return 0; -find: - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc); - - memcpy(new_mc, mc, total_size); - uci->mc = new_mc; - return 1; -} - -static void apply_microcode(int cpu) -{ - unsigned long flags; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (uci->mc == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); - return; - } - printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); - uci->rev = val[1]; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) -{ - struct microcode_header mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) -#endif - -static long get_next_ucode_from_buffer(void **mc, const u8 *buf, - unsigned long size, long offset) -{ - struct microcode_header *mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= size) - return 0; - mc_header = (struct microcode_header *)(buf + offset); - total_size = get_totalsize(mc_header); - - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; - } - - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; -} - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int cpu_request_microcode(int cpu) -{ - char name[30]; - struct cpuinfo_x86 *c = &cpu_data(cpu); - const struct firmware *firmware; - const u8 *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { - pr_debug("microcode: data file %s load failed\n", name); - return error; - } - buf = firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_maching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); - } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; - release_firmware(firmware); - - return error; -} - -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed_ptr(current, &old); - return err; -} - -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - uci->valid = 0; - vfree(uci->mc); - uci->mc = NULL; - mutex_unlock(µcode_mutex); -} - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c new file mode 100644 index 000000000000..4e7b2f65fed6 --- /dev/null +++ b/arch/x86/kernel/microcode_intel.c @@ -0,0 +1,855 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ +#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof (u32)) +#define get_totalsize(mc) \ + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) +#define get_datasize(mc) \ + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define sigmatch(s1, s2, p1, p2) \ + (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +/* serialize access to the physical write to MSR 0x79 */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); + +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info(int cpu_num) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu_num); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + unsigned int val[2]; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu_num); + uci->pf = uci->rev = 0; + uci->mc = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + printk(KERN_ERR "microcode: CPU%d not a capable Intel " + "processor\n", cpu_num); + uci->valid = 0; + return; + } + + uci->sig = cpuid_eax(0x00000001); + + if ((c->x86_model >= 5) || (c->x86 > 6)) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + uci->pf = 1 << ((val[1] >> 18) & 7); + } + + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); + pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", + uci->sig, uci->pf, uci->rev); +} + +static inline int microcode_update_match(int cpu_num, + struct microcode_header *mc_header, int sig, int pf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + if (!sigmatch(sig, uci->sig, pf, uci->pf) + || mc_header->rev <= uci->rev) + return 0; + return 1; +} + +static int microcode_sanity_check(void *mc) +{ + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + struct extended_signature *ext_sig; + unsigned long total_size, data_size, ext_table_size; + int sum, orig_sum, ext_sigcount = 0, i; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + if (data_size + MC_HEADER_SIZE > total_size) { + printk(KERN_ERR "microcode: error! " + "Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + printk(KERN_ERR "microcode: error! " + "Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + printk(KERN_ERR "microcode: error! " + "Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + printk(KERN_ERR "microcode: error! " + "Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + printk(KERN_WARNING "microcode: aborting, " + "bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + * return < 0 - error + */ +static int get_maching_microcode(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + void *new_mc; + + if (microcode_update_match(cpu, mc_header, + mc_header->sig, mc_header->pf)) + goto find; + + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + for (i = 0; i < ext_sigcount; i++) { + if (microcode_update_match(cpu, mc_header, + ext_sig->sig, ext_sig->pf)) + goto find; + ext_sig++; + } + return 0; +find: + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + new_mc = vmalloc(total_size); + if (!new_mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + + /* free previous update file */ + vfree(uci->mc); + + memcpy(new_mc, mc, total_size); + uci->mc = new_mc; + return 1; +} + +static void apply_microcode(int cpu) +{ + unsigned long flags; + unsigned int val[2]; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc == NULL) + return; + + /* serialize access to the physical write to MSR 0x79 */ + spin_lock_irqsave(µcode_update_lock, flags); + + /* write microcode via MSR 0x79 */ + wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) uci->mc->bits, + (unsigned long) uci->mc->bits >> 16 >> 16); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + spin_unlock_irqrestore(µcode_update_lock, flags); + if (val[1] != uci->mc->hdr.rev) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + return; + } + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x, date = %08x \n", + cpu_num, uci->rev, val[1], uci->mc->hdr.date); + uci->rev = val[1]; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static void __user *user_buffer; /* user area microcode data buffer */ +static unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode(void **mc, long offset) +{ + struct microcode_header mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(total_size); + if (!*mc) + return -ENOMEM; + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +static long get_next_ucode_from_buffer(void **mc, const u8 *buf, + unsigned long size, long offset) +{ + struct microcode_header *mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= size) + return 0; + mc_header = (struct microcode_header *)(buf + offset); + total_size = get_totalsize(mc_header); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(total_size); + if (!*mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + memcpy(*mc, buf + offset, total_size); + return offset + total_size; +} + +/* fake device for request_firmware */ +static struct platform_device *microcode_pdev; + +static int cpu_request_microcode(int cpu) +{ + char name[30]; + struct cpuinfo_x86 *c = &cpu_data(cpu); + const struct firmware *firmware; + const u8 *buf; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + sprintf(name,"intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + error = request_firmware(&firmware, name, µcode_pdev->dev); + if (error) { + pr_debug("microcode: data file %s load failed\n", name); + return error; + } + buf = firmware->data; + size = firmware->size; + while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) + > 0) { + error = microcode_sanity_check(mc); + if (error) + break; + error = get_maching_microcode(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) + vfree(mc); + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + unsigned int val[2]; + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed_ptr(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed_ptr(current, &old); + return err; +} + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc); + uci->mc = NULL; + mutex_unlock(µcode_mutex); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) -- cgit v1.2.3 From 3e135d887c973b525d43fbb67dfc5972694882f6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:17 +0200 Subject: x86: code split to two parts Split off existing code into two seperate files. One file holds general code, the other file vendor specific parts. No functional changes, only refactoring. Temporarily Introduced a new module name 'ucode' for result, due to already taken name 'microcode'. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 +- arch/x86/kernel/microcode.c | 463 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/microcode_intel.c | 387 ++----------------------------- 3 files changed, 488 insertions(+), 365 deletions(-) create mode 100644 arch/x86/kernel/microcode.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a9be2a0dde80..abb32aed7f4b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode_intel.o +obj-$(CONFIG_MICROCODE) += ucode.o +ucode-objs := microcode.o microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c new file mode 100644 index 000000000000..c1047d7f7ede --- /dev/null +++ b/arch/x86/kernel/microcode.c @@ -0,0 +1,463 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +extern long get_next_ucode(void **mc, long offset); +extern int microcode_sanity_check(void *mc); +extern int get_matching_microcode(void *mc, int cpu); +extern void collect_cpu_info(int cpu_num); +extern int cpu_request_microcode(int cpu); +extern void microcode_fini_cpu(int cpu); +extern void apply_microcode(int cpu); +extern int apply_microcode_check_cpu(int cpu); + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +void __user *user_buffer; /* user area microcode data buffer */ +unsigned int user_buffer_size; /* it's size */ + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +/* fake device for request_firmware */ +struct platform_device *microcode_pdev; + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 4e7b2f65fed6..eded0a154ea8 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -95,18 +95,16 @@ #include #include -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" - #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode *)mc)->hdr.totalsize ? \ ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) @@ -123,11 +121,11 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +extern struct mutex microcode_mutex; -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -static void collect_cpu_info(int cpu_num) +void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -140,7 +138,7 @@ static void collect_cpu_info(int cpu_num) uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { + cpu_has(c, X86_FEATURE_IA64)) { printk(KERN_ERR "microcode: CPU%d not a capable Intel " "processor\n", cpu_num); uci->valid = 0; @@ -175,7 +173,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -static int microcode_sanity_check(void *mc) +int microcode_sanity_check(void *mc) { struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +257,7 @@ static int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -static int get_maching_microcode(void *mc, int cpu) +int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header *mc_header = mc; @@ -288,7 +286,7 @@ static int get_maching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +301,7 @@ find: return 1; } -static void apply_microcode(int cpu) +void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -344,10 +342,10 @@ static void apply_microcode(int cpu) } #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ -static long get_next_ucode(void **mc, long offset) +long get_next_ucode(void **mc, long offset) { struct microcode_header mc_header; unsigned long total_size; @@ -375,117 +373,6 @@ static long get_next_ucode(void **mc, long offset) } return offset + total_size; } - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) #endif static long get_next_ucode_from_buffer(void **mc, const u8 *buf, @@ -515,9 +402,9 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, } /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +extern struct platform_device *microcode_pdev; -static int cpu_request_microcode(int cpu) +int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -530,7 +417,7 @@ static int cpu_request_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { @@ -544,7 +431,7 @@ static int cpu_request_microcode(int cpu) error = microcode_sanity_check(mc); if (error) break; - error = get_maching_microcode(mc, cpu); + error = get_matching_microcode(mc, cpu); if (error < 0) break; /* @@ -566,7 +453,7 @@ static int cpu_request_microcode(int cpu) return error; } -static int apply_microcode_check_cpu(int cpu) +int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -615,241 +502,13 @@ static int apply_microcode_check_cpu(int cpu) return err; } -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) +void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - vfree(uci->mc); + kfree(uci->mc); uci->mc = NULL; mutex_unlock(µcode_mutex); } - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) -- cgit v1.2.3 From d4ee36686853d5714437c4409f17ad42bfaf4211 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:18 +0200 Subject: x86: structure declaration renaming Renamed common structures to vendor specific naming scheme so other vendors will be able to use the same naming convention. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index eded0a154ea8..ca9861bf067e 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -100,17 +100,19 @@ MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode_intel *)mc)->hdr.totalsize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + #define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -134,7 +136,7 @@ void collect_cpu_info(int cpu_num) /* We should bind the task to the CPU */ BUG_ON(raw_smp_processor_id() != cpu_num); uci->pf = uci->rev = 0; - uci->mc = NULL; + uci->mc.mc_intel = NULL; uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -163,7 +165,7 @@ void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) + struct microcode_header_intel *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +177,7 @@ static inline int microcode_update_match(int cpu_num, int microcode_sanity_check(void *mc) { - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -260,7 +262,7 @@ int microcode_sanity_check(void *mc) int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -294,10 +296,10 @@ find: } /* free previous update file */ - vfree(uci->mc); + vfree(uci->mc.mc_intel); memcpy(new_mc, mc, total_size); - uci->mc = new_mc; + uci->mc.mc_intel = new_mc; return 1; } @@ -311,7 +313,7 @@ void apply_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc == NULL) + if (uci->mc.mc_intel == NULL) return; /* serialize access to the physical write to MSR 0x79 */ @@ -319,8 +321,8 @@ void apply_microcode(int cpu) /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); + (unsigned long) uci->mc.mc_intel->bits, + (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ @@ -330,14 +332,14 @@ void apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { + if (val[1] != uci->mc.mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); + cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); uci->rev = val[1]; } @@ -347,7 +349,7 @@ extern unsigned int user_buffer_size; /* it's size */ long get_next_ucode(void **mc, long offset) { - struct microcode_header mc_header; + struct microcode_header_intel mc_header; unsigned long total_size; /* No more data */ @@ -378,13 +380,13 @@ long get_next_ucode(void **mc, long offset) static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - struct microcode_header *mc_header; + struct microcode_header_intel *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (struct microcode_header *)(buf + offset); + mc_header = (struct microcode_header_intel *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { @@ -463,7 +465,7 @@ int apply_microcode_check_cpu(int cpu) int err = 0; /* Check if the microcode is available */ - if (!uci->mc) + if (!uci->mc.mc_intel) return 0; old = current->cpus_allowed; @@ -508,7 +510,7 @@ void microcode_fini_cpu(int cpu) mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc); - uci->mc = NULL; + kfree(uci->mc.mc_intel); + uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } -- cgit v1.2.3 From 8d86f390d9bb5b39f0a315838d1616de6363e1b9 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:21 +0200 Subject: x86: major refactoring Refactored code by introducing a two-module solution. There is one general module in which vendor specific modules can hook into. However, that is exclusive, there is only one vendor specific module allowed at a time. A CPU vendor check makes sure only the correct module for the underlying system gets called. Functinally in terms of patch loading itself there are no changes. This refactoring provides a basis for future implementations of other vendors' patch loaders. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/microcode.c | 80 +++++++++++++++++++++++---------------- arch/x86/kernel/microcode_intel.c | 50 +++++++++++++++++++----- 3 files changed, 89 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index abb32aed7f4b..f2f9f6da8c18 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,8 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += ucode.o -ucode-objs := microcode.o microcode_intel.o +obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index c1047d7f7ede..1e42e79ca694 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -99,25 +99,22 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" +#define MICROCODE_VERSION "2.00" -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -DEFINE_MUTEX(microcode_mutex); +struct microcode_ops *microcode_ops; -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); +EXPORT_SYMBOL_GPL(microcode_mutex); -extern long get_next_ucode(void **mc, long offset); -extern int microcode_sanity_check(void *mc); -extern int get_matching_microcode(void *mc, int cpu); -extern void collect_cpu_info(int cpu_num); -extern int cpu_request_microcode(int cpu); -extern void microcode_fini_cpu(int cpu); -extern void apply_microcode(int cpu); -extern int apply_microcode_check_cpu(int cpu); +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -void __user *user_buffer; /* user area microcode data buffer */ -unsigned int user_buffer_size; /* it's size */ +static void __user *user_buffer; /* user area microcode data buffer */ +EXPORT_SYMBOL_GPL(user_buffer); +static unsigned int user_buffer_size; /* it's size */ +EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) { @@ -130,8 +127,8 @@ static int do_microcode_update (void) old = current->cpus_allowed; - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); + while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* @@ -145,11 +142,12 @@ static int do_microcode_update (void) continue; cpumask_of_cpu_ptr_next(newmask, cpu); set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); + error = microcode_ops->get_matching_microcode(new_mc, + cpu); if (error < 0) goto out; if (error == 1) - apply_microcode(cpu); + microcode_ops->apply_microcode(cpu); } vfree(new_mc); } @@ -232,7 +230,8 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -struct platform_device *microcode_pdev; +static struct platform_device *microcode_pdev; +EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) { @@ -244,9 +243,9 @@ static void microcode_init_cpu(int cpu, int resume) set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); - collect_cpu_info(cpu); + microcode_ops->collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); + microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed_ptr(current, &old); } @@ -274,7 +273,7 @@ static ssize_t reload_store(struct sys_device *dev, mutex_lock(µcode_mutex); if (uci->valid) - err = cpu_request_microcode(cpu); + err = microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); put_online_cpus(); set_cpus_allowed_ptr(current, &old); @@ -349,7 +348,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } @@ -362,7 +361,7 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ - apply_microcode(0); + microcode_ops->apply_microcode(0); return 0; } @@ -382,7 +381,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_CANCELED_FROZEN: /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); break; case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -390,9 +389,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) break; case CPU_ONLINE_FROZEN: /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { + if (microcode_ops->apply_microcode_check_cpu(cpu)) { /* The application of microcode failed */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); __mc_sysdev_add(sys_dev, 1); break; } @@ -416,12 +415,17 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int __init microcode_init (void) +static int microcode_init(void *opaque, struct module *module) { + struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + if (microcode_ops) { + printk(KERN_ERR "microcode: already loaded the other module\n"); + return -EEXIST; + } + + microcode_ops = ops; error = microcode_dev_init(); if (error) @@ -443,8 +447,15 @@ static int __init microcode_init (void) } register_hotcpu_notifier(&mc_cpu_notifier); + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION + " " + " \n"); + return 0; } +EXPORT_SYMBOL_GPL(microcode_init); static void __exit microcode_exit (void) { @@ -457,7 +468,10 @@ static void __exit microcode_exit (void) put_online_cpus(); platform_device_unregister(microcode_pdev); -} -module_init(microcode_init) -module_exit(microcode_exit) + microcode_ops = NULL; + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} +EXPORT_SYMBOL_GPL(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index ca9861bf067e..831db5363dba 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -127,7 +127,7 @@ extern struct mutex microcode_mutex; extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -void collect_cpu_info(int cpu_num) +static void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +175,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -int microcode_sanity_check(void *mc) +static int microcode_sanity_check(void *mc) { struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +259,7 @@ int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -int get_matching_microcode(void *mc, int cpu) +static int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_intel *mc_header = mc; @@ -288,7 +288,8 @@ int get_matching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); + " version 0x%x (current=0x%x)\n", + cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +304,7 @@ find: return 1; } -void apply_microcode(int cpu) +static void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -347,7 +348,7 @@ void apply_microcode(int cpu) extern void __user *user_buffer; /* user area microcode data buffer */ extern unsigned int user_buffer_size; /* it's size */ -long get_next_ucode(void **mc, long offset) +static long get_next_ucode(void **mc, long offset) { struct microcode_header_intel mc_header; unsigned long total_size; @@ -406,7 +407,7 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, /* fake device for request_firmware */ extern struct platform_device *microcode_pdev; -int cpu_request_microcode(int cpu) +static int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -455,7 +456,7 @@ int cpu_request_microcode(int cpu) return error; } -int apply_microcode_check_cpu(int cpu) +static int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -504,13 +505,42 @@ int apply_microcode_check_cpu(int cpu) return err; } -void microcode_fini_cpu(int cpu) +static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc.mc_intel); + vfree(uci->mc.mc_intel); uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } + +static struct microcode_ops microcode_intel_ops = { + .get_next_ucode = get_next_ucode, + .get_matching_microcode = get_matching_microcode, + .microcode_sanity_check = microcode_sanity_check, + .apply_microcode_check_cpu = apply_microcode_check_cpu, + .cpu_request_microcode = cpu_request_microcode, + .collect_cpu_info = collect_cpu_info, + .apply_microcode = apply_microcode, + .microcode_fini_cpu = microcode_fini_cpu, +}; + +static int __init microcode_intel_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + if (c->x86_vendor == X86_VENDOR_INTEL) + return microcode_init(µcode_intel_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_intel_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_intel_module_init) +module_exit(microcode_intel_module_exit) -- cgit v1.2.3 From 80cc9f1020f49c9e5b50898c102fd444de70a0a3 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:22 +0200 Subject: x86: AMD microcode patch loading support This patch introduces microcode patch loading for AMD processors. It is based on previous corresponding work for Intel processors. It hooks into the general patch loading module. Main difference is that a container file format is used to hold all patch data for multiple processors as well as an equivalent CPU table, which comes seperately, as opposed to Intel's microcode patching solution. Kconfig and Makefile have been changed provice config and build option for new source file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/microcode_amd.c | 521 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 522 insertions(+) create mode 100644 arch/x86/kernel/microcode_amd.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f2f9f6da8c18..be454f348c3b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o +obj-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c new file mode 100644 index 000000000000..db199e3fdd1f --- /dev/null +++ b/arch/x86/kernel/microcode_amd.c @@ -0,0 +1,521 @@ +/* + * AMD CPU Microcode Update Driver for Linux + * Copyright (C) 2008 Advanced Micro Devices Inc. + * + * Author: Peter Oruba + * + * Based on work by: + * Tigran Aivazian + * + * This driver allows to upgrade microcode on AMD + * family 0x10 and 0x11 processors. + * + * Licensed unter the terms of the GNU General Public + * License version 2. See file COPYING for details. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("AMD Microcode Update Driver"); +MODULE_AUTHOR("Peter Oruba "); +MODULE_LICENSE("GPLv2"); + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define UCODE_MAX_SIZE (2048) +#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DWSIZE (sizeof(u32)) +/* For now we support a fixed ucode total size only */ +#define get_totalsize(mc) \ + ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + + MC_HEADER_SIZE) + +extern int microcode_init(void *opaque, struct module *module); +extern void microcode_exit(void); + +/* serialize access to the physical write */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +extern struct mutex (microcode_mutex); + +struct equiv_cpu_entry *equiv_cpu_table; + +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + uci->rev = 0; + uci->pf = 0; + uci->mc.mc_amd = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", + cpu); + uci->valid = 0; + return; + } + + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (uci->rev) + : "i" (0x0000008B) : "ecx"); + + printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", + uci->rev); +} + +static int get_matching_microcode_amd(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header_amd *mc_header = mc; + unsigned long total_size = get_totalsize(mc_header); + void *new_mc; + struct pci_dev *nb_pci_dev, *sb_pci_dev; + unsigned int current_cpu_id; + unsigned int equiv_cpu_id = 0x00; + unsigned int i = 0; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + /* This is a tricky part. We might be called from a write operation */ + /* to the device file instead of the usual process of firmware */ + /* loading. This routine needs to be able to distinguish both */ +/* cases. This is done by checking if there alread is a equivalent */ + /* CPU table installed. If not, we're written through */ + /* /dev/cpu/microcode. */ +/* Since we ignore all checks. The error case in which going through */ +/* firmware loading and that table is not loaded has already been */ + /* checked earlier. */ + if (equiv_cpu_table == NULL) { + printk(KERN_INFO "microcode: CPU%d microcode update with " + "version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + goto out; + } + + current_cpu_id = cpuid_eax(0x00000001); + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { + equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; + break; + } + i++; + } + + if (!equiv_cpu_id) { + printk(KERN_ERR "microcode: CPU%d cpu_id " + "not found in equivalent cpu table \n", cpu); + return 0; + } + + if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { + printk(KERN_ERR + "microcode: CPU%d patch does not match " + "(patch is %x, cpu extended is %x) \n", + cpu, mc_header->processor_rev_id[0], + (equiv_cpu_id & 0xff)); + return 0; + } + + if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { + printk(KERN_ERR "microcode: CPU%d patch does not match " + "(patch is %x, cpu base id is %x) \n", + cpu, mc_header->processor_rev_id[1], + ((equiv_cpu_id >> 16) & 0xff)); + + return 0; + } + + /* ucode may be northbridge specific */ + if (mc_header->nb_dev_id) { + nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->nb_dev_id & 0xff), + NULL); + if ((!nb_pci_dev) || + (mc_header->nb_rev_id != nb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); + pci_dev_put(nb_pci_dev); + return 0; + } + pci_dev_put(nb_pci_dev); + } + + /* ucode may be southbridge specific */ + if (mc_header->sb_dev_id) { + sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->sb_dev_id & 0xff), + NULL); + if ((!sb_pci_dev) || + (mc_header->sb_rev_id != sb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); + pci_dev_put(sb_pci_dev); + return 0; + } + pci_dev_put(sb_pci_dev); + } + + if (mc_header->patch_id <= uci->rev) + return 0; + + printk(KERN_INFO "microcode: CPU%d found a matching microcode " + "update with version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + +out: + new_mc = vmalloc(UCODE_MAX_SIZE); + if (!new_mc) { + printk(KERN_ERR "microcode: error, can't allocate memory\n"); + return -ENOMEM; + } + memset(new_mc, 0, UCODE_MAX_SIZE); + + /* free previous update file */ + vfree(uci->mc.mc_amd); + + memcpy(new_mc, mc, total_size); + + uci->mc.mc_amd = new_mc; + return 1; +} + +static void apply_microcode_amd(int cpu) +{ + unsigned long flags; + unsigned int eax, edx; + unsigned int rev; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc.mc_amd == NULL) + return; + + spin_lock_irqsave(µcode_update_lock, flags); + + edx = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) >> 32); + eax = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL); + + asm volatile("movl %0, %%ecx; wrmsr" : + : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); + + /* get patch id after patching */ + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + spin_unlock_irqrestore(µcode_update_lock, flags); + + /* check current patch id and patch's id for match */ + if (rev != uci->mc.mc_amd->hdr.patch_id) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, + uci->mc.mc_amd->hdr.patch_id, rev); + return; + } + + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x \n", + cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id); + + uci->rev = rev; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode_amd(void **mc, long offset) +{ + struct microcode_header_amd mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) + return -ENOMEM; + memset(*mc, 0, UCODE_MAX_SIZE); + + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} +#else +#define get_next_ucode_amd() NULL +#endif + +static long get_next_ucode_from_buffer_amd(void **mc, void *buf, + unsigned long size, long offset) +{ + struct microcode_header_amd *mc_header; + unsigned long total_size; + unsigned char *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[offset] != UCODE_UCODE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode payload type field\n"); + return -EINVAL; + } + + mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); + + total_size = (unsigned long) (buf_pos[offset+4] + + (buf_pos[offset+5] << 8)); + + printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", + size, total_size, offset); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) { + printk(KERN_ERR "microcode: error! " + "Can not allocate memory for microcode patch\n"); + return -ENOMEM; + } + + memset(*mc, 0, UCODE_MAX_SIZE); + memcpy(*mc, buf + offset + 8, total_size); + + return offset + total_size + 8; +} + +static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) +{ + unsigned int *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table type field\n"); + return 0; + } + + if (size == 0) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table length\n"); + return 0; + } + + equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); + if (!equiv_cpu_table) { + printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); + return 0; + } + + memset(equiv_cpu_table, 0, size); + memcpy(equiv_cpu_table, &buf_pos[3], size); + + return size + 12; /* add header length */ +} + +/* fake device for request_firmware */ +extern struct platform_device *microcode_pdev; + +static int cpu_request_microcode_amd(int cpu) +{ + char name[30]; + const struct firmware *firmware; + void *buf; + unsigned int *buf_pos; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + sprintf(name, "amd-ucode/microcode_amd.bin"); + error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin", + µcode_pdev->dev); + if (error) { + printk(KERN_ERR "microcode: ucode data file %s load failed\n", + name); + return error; + } + + buf_pos = buf = firmware->data; + size = firmware->size; + + if (buf_pos[0] != UCODE_MAGIC) { + printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n"); + return -EINVAL; + } + + offset = install_equiv_cpu_table(buf, buf_pos[2], offset); + + if (!offset) { + printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); + return -EINVAL; + } + + while ((offset = + get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) { + error = get_matching_microcode_amd(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode_amd(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) { + vfree(mc); + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; + } + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int rev; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc.mc_amd) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16) + err = -EINVAL; + + if (!err) { + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + if (uci->rev != rev) + err = -EINVAL; + } + + if (!err) + apply_microcode_amd(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " rev=0x%x\n", + cpu, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + +static void microcode_fini_cpu_amd(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc.mc_amd); + uci->mc.mc_amd = NULL; + mutex_unlock(µcode_mutex); +} + +static struct microcode_ops microcode_amd_ops = { + .get_next_ucode = get_next_ucode_amd, + .get_matching_microcode = get_matching_microcode_amd, + .microcode_sanity_check = NULL, + .apply_microcode_check_cpu = apply_microcode_check_cpu_amd, + .cpu_request_microcode = cpu_request_microcode_amd, + .collect_cpu_info = collect_cpu_info_amd, + .apply_microcode = apply_microcode_amd, + .microcode_fini_cpu = microcode_fini_cpu_amd, +}; + +static int __init microcode_amd_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + equiv_cpu_table = NULL; + if (c->x86_vendor == X86_VENDOR_AMD) + return microcode_init(µcode_amd_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_amd_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_amd_module_init) +module_exit(microcode_amd_module_exit) -- cgit v1.2.3 From e56b3bc7942982ac2589c942fb345e38bc7a341a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Jul 2008 11:32:33 -0700 Subject: cpu masks: optimize and clean up cpumask_of_cpu() Clean up and optimize cpumask_of_cpu(), by sharing all the zero words. Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns creating a huge array of constant bitmasks, realize that the zero words can be shared. In other words, on a 64-bit architecture, we only ever need 64 of these arrays - with a different bit set in one single world (with enough zero words around it so that we can create any bitmask by just offsetting in that big array). And then we just put enough zeroes around it that we can point every single cpumask to be one of those things. So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each, with one bit set in each array - 2MB memory total), we have exactly 64 arrays instead, each 8k bits in size (64kB total). And then we just point cpumask(n) to the right position (which we can calculate dynamically). Once we have the right arrays, getting "cpumask(n)" ends up being: static inline const cpumask_t *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return (const cpumask_t *)p; } This brings other advantages and simplifications as well: - we are not wasting memory that is just filled with a single bit in various different places - we don't need all those games to re-create the arrays in some dense format, because they're already going to be dense enough. if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory is a non-issue (especially since by doing this "overlapping" trick we probably get better cache behaviour anyway). [ mingo@elte.hu: Converted Linus's mails into a commit. See: http://lkml.org/lkml/2008/7/27/156 http://lkml.org/lkml/2008/7/28/320 Also applied a family filter - which also has the side-effect of leaving out the bits where Linus calls me an idio... Oh, never mind ;-) ] Signed-off-by: Ingo Molnar Cc: Rusty Russell Cc: Andrew Morton Cc: Al Viro Cc: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1cd53dfcd309..76e305e064f9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -/* - * Replace static cpumask_of_cpu_map in the initdata section, - * with one that's allocated sized by the possible number of cpus. - * - * (requires nr_cpu_ids to be initialized) - */ -static void __init setup_cpumask_of_cpu(void) -{ - int i; - - /* alloc_bootmem zeroes memory */ - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); - for (i = 0; i < nr_cpu_ids; i++) - cpu_set(i, cpumask_of_cpu_map[i]); -} -#else -static inline void setup_cpumask_of_cpu(void) { } -#endif - #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void) /* Setup node to cpumask map */ setup_node_to_cpumask_map(); - - /* Setup cpumask_of_cpu map */ - setup_cpumask_of_cpu(); } #endif -- cgit v1.2.3 From 45b1e23eca1c53fa79a611a2bc8c93697ede6c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:42:17 +0200 Subject: x86, microcode support: fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/microcode.c:412: error: static declaration of ‘microcode_init’ follows non-static declaration include/asm/microcode.h:1: error: previous declaration of ‘microcode_init’ was here arch/x86/kernel/microcode.c:454: error: static declaration of ‘microcode_exit’ follows non-static declaration include/asm/microcode.h:2: error: previous declaration of ‘microcode_exit’ was here Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 1e42e79ca694..9a881845b35b 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -415,7 +415,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int microcode_init(void *opaque, struct module *module) +int microcode_init(void *opaque, struct module *module) { struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; @@ -457,7 +457,7 @@ static int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -static void __exit microcode_exit (void) +void __exit microcode_exit (void) { microcode_dev_exit(); -- cgit v1.2.3 From 224e946b81166d732f3e9b414cb69612072fb500 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:52:55 +0200 Subject: x86, microcode: fix symbol exports fix tons of build errors: arch/x86/kernel/built-in.o: In function `microcode_fini_cpu': microcode_intel.c:(.text+0x11598): undefined reference to `microcode_mutex' microcode_intel.c:(.text+0x115a4): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115ae): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115bc): undefined reference to `microcode_mutex' [...] Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 9a881845b35b..758b958a6637 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -104,16 +104,16 @@ MODULE_LICENSE("GPL"); struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +DEFINE_MUTEX(microcode_mutex); EXPORT_SYMBOL_GPL(microcode_mutex); -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ +void __user *user_buffer; /* user area microcode data buffer */ EXPORT_SYMBOL_GPL(user_buffer); -static unsigned int user_buffer_size; /* it's size */ +unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) @@ -230,7 +230,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +struct platform_device *microcode_pdev; EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) -- cgit v1.2.3 From 5d7b605245b1aa1a9cd6549b1f57d69273eb0c37 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 10:07:36 +0200 Subject: x86, microcode: fix module license string fix: FATAL: modpost: GPL-incompatible module microcode_amd.ko uses GPL-only symbol 'set_cpus_allowed_ptr' Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index db199e3fdd1f..fd9e68e88899 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -40,7 +40,7 @@ MODULE_DESCRIPTION("AMD Microcode Update Driver"); MODULE_AUTHOR("Peter Oruba "); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); #define UCODE_MAGIC 0x00414d44 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -- cgit v1.2.3 From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Jul 2008 13:38:53 +0900 Subject: generic, x86: fix add iommu_num_pages helper function This IOMMU helper function doesn't work for some architectures: http://marc.info/?l=linux-kernel&m=121699304403202&w=2 It also breaks POWER and SPARC builds: http://marc.info/?l=linux-kernel&m=121730388001890&w=2 Currently, only x86 IOMMUs use this so let's move it to x86 for now. Reported-by: Stephen Rothwell Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8dbffb846de9..87d4d6964ec2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* -- cgit v1.2.3 From 0d39741a27d86d305cc75ba626392be410dcbab9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 29 Jul 2008 22:34:01 -0700 Subject: GRU Driver: export is_uv_system(), zap_page_range() & follow_page() Exports needed by the GRU driver. Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/genapic_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..eaff0bbb1444 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -99,3 +99,4 @@ int is_uv_system(void) { return uv_system_type != UV_NONE; } +EXPORT_SYMBOL_GPL(is_uv_system); -- cgit v1.2.3 From a648bf4632628c787abb0514277f2a231fca39ca Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:18 -0700 Subject: x86, xsave: xsave cpuid feature bits Add xsave CPU feature bits. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index 0bf4d37a0483..741547225659 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -46,7 +46,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "xsave", NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", -- cgit v1.2.3 From dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:19 -0700 Subject: x86, xsave: enable xsave/xrstor on cpus with xsave support Enables xsave/xrstor by turning on cr4.osxsave on cpu's which have the xsave support. For now, features that OS supports/enabled are FP and SSE. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 8 ++++ arch/x86/kernel/i387.c | 12 ++++++ arch/x86/kernel/traps_32.c | 1 - arch/x86/kernel/traps_64.c | 4 -- arch/x86/kernel/xsave.c | 87 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 arch/x86/kernel/xsave.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f3312..d6ea91abaebc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o +obj-y += i387.o xsave.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..fabbcb7020fb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -712,6 +712,14 @@ void __cpuinit cpu_init(void) current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8efb82..e22a9a9dce8a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -61,6 +61,11 @@ void __init init_thread_xstate(void) return; } + if (cpu_has_xsave) { + xsave_cntxt_init(); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -83,6 +88,13 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + xsave_init(); + mxcsr_feature_mask_init(); /* clean state in init */ current_thread_info()->status = 0; diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 03df8e45e5a1..da5a5964fccb 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1228,7 +1228,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caaca7115..3580a7938a2e 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1172,10 +1172,6 @@ void __init trap_init(void) #ifdef CONFIG_IA32_EMULATION set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif - /* - * initialize the per thread extended state: - */ - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 000000000000..c68b7c4ca249 --- /dev/null +++ b/arch/x86/kernel/xsave.c @@ -0,0 +1,87 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ +#include +#include +#include + +/* + * Supported feature mask by the CPU and the kernel. + */ +unsigned int pcntxt_hmask, pcntxt_lmask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +/* + * Enable the extended processor state save/restore feature + */ +void __cpuinit xsave_init(void) +{ + if (!cpu_has_xsave) + return; + + set_in_cr4(X86_CR4_OSXSAVE); + + /* + * Enable all the features that the HW is capable of + * and the Linux kernel is aware of. + * + * xsetbv(); + */ + asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), + "a" (pcntxt_lmask), "d" (pcntxt_hmask)); +} + +/* + * setup the xstate image representing the init state + */ +void setup_xstate_init(void) +{ + init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; +} + +/* + * Enable and initialize the xsave feature. + */ +void __init xsave_cntxt_init(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + pcntxt_lmask = eax; + pcntxt_hmask = edx; + + if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", + pcntxt_lmask); + BUG(); + } + + /* + * for now OS knows only about FP/SSE + */ + pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; + pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; + + xsave_init(); + + /* + * Recompute the context size for enabled features + */ + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + xstate_size = ebx; + + setup_xstate_init(); + + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + "cntxt size 0x%x\n", + (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); +} -- cgit v1.2.3 From b359e8a434cc3d09847010fc4aeccf48d69740e4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:20 -0700 Subject: x86, xsave: context switch support using xsave/xrstor Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch when available. Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor instructions during context switch instead of the legacy fxsave/fxrstor instructions. Thread-synchronous status word is already in L1 cache during this code patch and thus minimizes the performance penality compared to (cpu_has_xsave) checks. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 5 ++++- arch/x86/kernel/i387.c | 5 ++++- arch/x86/kernel/traps_64.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fabbcb7020fb..6c2b9e756db2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -709,7 +709,10 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e22a9a9dce8a..b778e17e4b01 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -97,7 +97,10 @@ void __cpuinit fpu_init(void) mxcsr_feature_mask_init(); /* clean state in init */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3580a7938a2e..38eb76156a47 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + if (unlikely(restore_fpu_checking(me))) { stts(); force_sig(SIGSEGV, me); return; -- cgit v1.2.3 From 3c1c7f101426cb2ecc79d817a8a65928965fc860 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:21 -0700 Subject: x86, xsave: dynamically allocate sigframes fpstate instead of static allocation dynamically allocate fpstate on the stack, instead of static allocation in the current sigframe layout on the user stack. This will allow the fpstate structure to grow in the future, which includes extended state information supporting xsave/xrstor. signal handlers will be able to access the fpstate pointer from the sigcontext structure asusual, with no change. For the non RT sigframe's (which are supported only for 32bit apps), current static fpstate layout in the sigframe will be unused(so that we don't change the extramask[] offset in the sigframe and thus prevent breaking app's which modify extramask[]). Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 2 ++ arch/x86/kernel/sigframe.h | 14 ++++++++++++-- arch/x86/kernel/signal_32.c | 18 +++++++++++++----- arch/x86/kernel/signal_64.c | 2 +- arch/x86/kernel/xsave.c | 4 ++++ 5 files changed, 32 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b778e17e4b01..51fb288a2c97 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -24,6 +24,7 @@ # define save_i387_ia32 save_i387 # define restore_i387_ia32 restore_i387 # define _fpstate_ia32 _fpstate +# define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -36,6 +37,7 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; +unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2dc..6dd7e2b70a4b 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -3,9 +3,18 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate fpstate_unused; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe { @@ -15,13 +24,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; char retcode[8]; + /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; + /* fp state follows here */ }; #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd8933..19a7a5669b5b 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -306,7 +306,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + struct _fpstate **fpstate) { unsigned long sp; @@ -332,6 +333,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) sp = (unsigned long) ka->sa.sa_restorer; } + if (used_math()) { + sp = sp - sig_xstate_size; + *fpstate = (struct _fpstate *) sp; + } + sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -350,8 +356,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -366,7 +373,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -427,8 +434,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -453,7 +461,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ca316b5b742c..0deab8eff33f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,7 +281,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sig_xstate_size); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c68b7c4ca249..7ad169e33528 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -17,6 +17,10 @@ unsigned int pcntxt_hmask, pcntxt_lmask; */ struct xsave_struct *init_xstate_buf; +#ifdef CONFIG_X86_64 +unsigned int sig_xstate_size = sizeof(struct _fpstate); +#endif + /* * Enable the extended processor state save/restore feature */ -- cgit v1.2.3 From ab5137015fed9b948fe835a2d99a4cfbd50a0c40 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:22 -0700 Subject: x86, xsave: reorganization of signal save/restore fpstate code layout move 64bit routines that saves/restores fpstate in/from user stack from signal_64.c to xsave.c restore_i387_xstate() now handles the condition when user passes NULL fpstate. Other misc changes for prepartion of xsave/xrstor sigcontext support. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 44 +++++++++++++++++------- arch/x86/kernel/signal_32.c | 28 ++++----------- arch/x86/kernel/signal_64.c | 83 ++------------------------------------------- arch/x86/kernel/xsave.c | 79 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 114 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 51fb288a2c97..7daf3a011dd9 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -21,9 +21,10 @@ # include # include #else -# define save_i387_ia32 save_i387 -# define restore_i387_ia32 restore_i387 +# define save_i387_xstate_ia32 save_i387_xstate +# define restore_i387_xstate_ia32 restore_i387_xstate # define _fpstate_ia32 _fpstate +# define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct @@ -424,7 +425,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; - unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -438,8 +438,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; - unlazy_fpu(tsk); - convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -455,10 +453,16 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) return 1; } -int save_i387_ia32(struct _fpstate_ia32 __user *buf) +int save_i387_xstate_ia32(void __user *buf) { + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; + struct task_struct *tsk = current; + if (!used_math()) return 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) + return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -468,13 +472,15 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) ? -1 : 1; + NULL, fp) ? -1 : 1; } + unlazy_fpu(tsk); + if (cpu_has_fxsr) - return save_i387_fxsave(buf); + return save_i387_fxsave(fp); else - return save_i387_fsave(buf); + return save_i387_fsave(fp); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -502,14 +508,26 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } -int restore_i387_ia32(struct _fpstate_ia32 __user *buf) +int restore_i387_xstate_ia32(void __user *buf) { int err; struct task_struct *tsk = current; + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) + return -EACCES; + if (!used_math()) { err = init_fpu(tsk); if (err) @@ -518,13 +536,13 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) if (HAVE_HWFP) { if (cpu_has_fxsr) - err = restore_i387_fxsave(buf); + err = restore_i387_fxsave(fp); else - err = restore_i387_fsave(buf); + err = restore_i387_fsave(fp); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + NULL, fp) != 0; } set_used_math(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 19a7a5669b5b..690cc616ac07 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -159,28 +159,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -262,7 +248,7 @@ badframe: * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -289,7 +275,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387(fpstate); + tmp = save_i387_xstate(fpstate); if (tmp < 0) err = 1; else @@ -307,7 +293,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, */ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate **fpstate) + void **fpstate) { unsigned long sp; @@ -356,7 +342,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -434,7 +420,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 0deab8eff33f..ddf6123a55c8 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } -/* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ - clear_fpu(tsk); - clear_used_math(); - } - return err; -} - /* * Do a signal return; undo the signal stack. */ @@ -160,25 +97,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user * buf; err |= __get_user(buf, &sc->fpstate); - - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -276,7 +199,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + void __user *fp = NULL; int err = 0; struct task_struct *me = current; @@ -288,7 +211,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) + if (save_i387_xstate(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7ad169e33528..608e72d7ca64 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -12,6 +12,85 @@ */ unsigned int pcntxt_hmask, pcntxt_lmask; +#ifdef CONFIG_X86_64 +/* + * Signal frame handlers. + */ + +int save_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) + return -EACCES; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.xstate->fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387_xstate: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *) + buf); + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + xstate_size)) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +int restore_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err; + + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) + return -EACCES; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; +} +#endif + /* * Represents init state for the supported extended state. */ -- cgit v1.2.3 From c37b5efea43f9e500363f9973dd00e3d2cdcc685 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:25 -0700 Subject: x86, xsave: save/restore the extended state context in sigframe On cpu's supporting xsave/xrstor, fpstate pointer in the sigcontext, will include the extended state information along with fpstate information. Presence of extended state information is indicated by the presence of FP_XSTATE_MAGIC1 at fpstate.sw_reserved.magic1 and FP_XSTATE_MAGIC2 at fpstate + (fpstate.sw_reserved.extended_size - FP_XSTATE_MAGIC2_SIZE). Extended feature bit mask that is saved in the memory layout is represented by the fpstate.sw_reserved.xstate_bv For RT signal frames, UC_FP_XSTATE in the uc_flags also indicate the presence of extended state information in the sigcontext's fpstate pointer. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 82 +++++++++++++++++++-- arch/x86/kernel/signal_32.c | 5 +- arch/x86/kernel/signal_64.c | 7 +- arch/x86/kernel/xsave.c | 172 +++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 247 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7daf3a011dd9..cbb9dc474a21 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -26,6 +26,7 @@ # define _fpstate_ia32 _fpstate # define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size +# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -447,12 +448,30 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, - sizeof(struct i387_fxsave_struct))) + if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) return -1; return 1; } +static int save_i387_xsave(void __user *buf) +{ + struct _fpstate_ia32 __user *fx = buf; + int err = 0; + + if (save_i387_fxsave(fx) < 0) + return -1; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, + sizeof(struct _fpx_sw_bytes)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_ia32_size + - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return -1; + + return 1; +} + int save_i387_xstate_ia32(void __user *buf) { struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; @@ -477,6 +496,8 @@ int save_i387_xstate_ia32(void __user *buf) unlazy_fpu(tsk); + if (cpu_has_xsave) + return save_i387_xsave(fp); if (cpu_has_fxsr) return save_i387_fxsave(fp); else @@ -491,14 +512,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, + unsigned int size) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + size); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -508,6 +530,51 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } +static int restore_i387_xsave(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + struct _fpstate_ia32 __user *fx_user = + ((struct _fpstate_ia32 __user *) buf); + struct i387_fxsave_struct __user *fx = + (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + unsigned int lmask, hmask; + int err; + + if (check_for_xstate(fx, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); + + xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory layout + * and enabled by the OS. + */ + lmask = ~(pcntxt_lmask & ~lmask); + hmask = ~(pcntxt_hmask & ~hmask); + xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + + return err; +fx_only: + /* + * Couldn't find the extended state information in the memory + * layout. Restore the FP/SSE and init the other extended state + * enabled by the OS. + */ + xsave_hdr->xstate_bv = XSTATE_FPSSE; + return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); +} + int restore_i387_xstate_ia32(void __user *buf) { int err; @@ -535,8 +602,11 @@ int restore_i387_xstate_ia32(void __user *buf) } if (HAVE_HWFP) { - if (cpu_has_fxsr) - err = restore_i387_fxsave(fp); + if (cpu_has_xsave) + err = restore_i387_xsave(buf); + else if (cpu_has_fxsr) + err = restore_i387_fxsave(fp, sizeof(struct + i387_fxsave_struct)); else err = restore_i387_fsave(fp); } else { diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 690cc616ac07..0f98d69fbdb0 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -441,7 +441,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ddf6123a55c8..2621b98f5bf6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -192,7 +192,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 16); + return (void __user *)round_down(sp - size, 64); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -226,7 +226,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 608e72d7ca64..dd66d0714c18 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -6,12 +6,68 @@ #include #include #include +#ifdef CONFIG_IA32_EMULATION +#include +#endif /* * Supported feature mask by the CPU and the kernel. */ unsigned int pcntxt_hmask, pcntxt_lmask; +struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +struct _fpx_sw_bytes fx_sw_reserved_ia32; +#endif + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw_user) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + int err; + + err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], + sizeof(struct _fpx_sw_bytes)); + + if (err) + return err; + + /* + * First Magic check failed. + */ + if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) + return -1; + + /* + * Check for error scenarios. + */ + if (fx_sw_user->xstate_size < min_xstate_size || + fx_sw_user->xstate_size > xstate_size || + fx_sw_user->xstate_size > fx_sw_user->extended_size) + return -1; + + err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (err || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + #ifdef CONFIG_X86_64 /* * Signal frame handlers. @@ -28,15 +84,18 @@ int save_i387_xstate(void __user *buf) BUILD_BUG_ON(sizeof(struct user_i387_struct) != sizeof(tsk->thread.xstate->fxsave)); - if ((unsigned long)buf % 16) + if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); if (!used_math()) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = xsave_user(buf); + else + err = fxsave_user(buf); + if (err) return err; task_thread_info(tsk)->status &= ~TS_USEDFPU; @@ -46,23 +105,77 @@ int save_i387_xstate(void __user *buf) xstate_size)) return -1; } + + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct _fpstate __user *fx = buf; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_size + - FP_XSTATE_MAGIC2_SIZE)); + } + return 1; } +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE + * state. + */ +int restore_user_xstate(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + unsigned int lmask, hmask; + int err; + + if (((unsigned long)buf % 64) || + check_for_xstate(buf, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + /* + * restore the state passed by the user. + */ + err = xrestore_user(buf, lmask, hmask); + if (err) + return err; + + /* + * init the state skipped by the user. + */ + lmask = pcntxt_lmask & ~lmask; + hmask = pcntxt_hmask & ~hmask; + + xrstor_state(init_xstate_buf, lmask, hmask); + + return 0; + +fx_only: + /* + * couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, + pcntxt_hmask); + return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +} + /* * This restores directly out of user space. Exceptions are handled. */ int restore_i387_xstate(void __user *buf) { struct task_struct *tsk = current; - int err; + int err = 0; if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - + if (used_math()) + goto clear; return 0; } else if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) @@ -78,12 +191,17 @@ int restore_i387_xstate(void __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = restore_user_xstate(buf); + else + err = fxrstor_checking((__force struct i387_fxsave_struct *) + buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ +clear: clear_fpu(tsk); clear_used_math(); } @@ -91,6 +209,38 @@ int restore_i387_xstate(void __user *buf) } #endif +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void prepare_fx_sw_frame(void) +{ + int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + + FP_XSTATE_MAGIC2_SIZE; + + sig_xstate_size = sizeof(struct _fpstate) + size_extended; + +#ifdef CONFIG_IA32_EMULATION + sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; +#endif + + memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.xstate_bv = pcntxt_lmask | + (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_size = xstate_size; +#ifdef CONFIG_IA32_EMULATION + memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; +#endif +} + /* * Represents init state for the supported extended state. */ @@ -162,6 +312,8 @@ void __init xsave_cntxt_init(void) xstate_size = ebx; + prepare_fx_sw_frame(); + setup_xstate_init(); printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " -- cgit v1.2.3 From 42deec6f2c3688fdaf986225ac901b817cd91568 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:26 -0700 Subject: x86, xsave: update xsave header bits during ptrace fpregs set FP/SSE bits may be zero in the xsave header(representing the init state). Update these bits during the ptrace fpregs set operation, to indicate the non-init state. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cbb9dc474a21..e0ed59f5c19f 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -214,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + return ret; } @@ -414,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } -- cgit v1.2.3 From 6152e4b1c99a3689fc318d092cd144597f7dbd14 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:23:16 -0700 Subject: x86, xsave: keep the XSAVE feature mask as an u64 The XSAVE feature mask is a 64-bit number; keep it that way, in order to avoid the mistake done with rdmsr/wrmsr. Use the xsetbv() function provided in the previous patch. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 12 +++++------- arch/x86/kernel/xsave.c | 45 +++++++++++++++++---------------------------- 2 files changed, 22 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e0ed59f5c19f..45723f1fe198 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -552,18 +552,17 @@ static int restore_i387_xsave(void __user *buf) (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; struct xsave_hdr_struct *xsave_hdr = ¤t->thread.xstate->xsave.xsave_hdr; - unsigned int lmask, hmask; + u64 mask; int err; if (check_for_xstate(fx, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + xsave_hdr->xstate_bv &= pcntxt_mask; /* * These bits must be zero. */ @@ -573,9 +572,8 @@ static int restore_i387_xsave(void __user *buf) * Init the state that is not present in the memory layout * and enabled by the OS. */ - lmask = ~(pcntxt_lmask & ~lmask); - hmask = ~(pcntxt_hmask & ~hmask); - xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + mask = ~(pcntxt_mask & ~mask); + xsave_hdr->xstate_bv &= mask; return err; fx_only: diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index dd66d0714c18..7415f3e38a51 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -9,11 +9,12 @@ #ifdef CONFIG_IA32_EMULATION #include #endif +#include /* * Supported feature mask by the CPU and the kernel. */ -unsigned int pcntxt_hmask, pcntxt_lmask; +u64 pcntxt_mask; struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION @@ -127,30 +128,28 @@ int save_i387_xstate(void __user *buf) int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; - unsigned int lmask, hmask; + u64 mask; int err; if (((unsigned long)buf % 64) || check_for_xstate(buf, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; /* * restore the state passed by the user. */ - err = xrestore_user(buf, lmask, hmask); + err = xrestore_user(buf, mask); if (err) return err; /* * init the state skipped by the user. */ - lmask = pcntxt_lmask & ~lmask; - hmask = pcntxt_hmask & ~hmask; + mask = pcntxt_mask & ~mask; - xrstor_state(init_xstate_buf, lmask, hmask); + xrstor_state(init_xstate_buf, mask); return 0; @@ -160,8 +159,7 @@ fx_only: * memory layout. Restore just the FP/SSE and init all * the other extended state. */ - xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, - pcntxt_hmask); + xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); return fxrstor_checking((__force struct i387_fxsave_struct *)buf); } @@ -231,8 +229,7 @@ void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_lmask | - (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; #ifdef CONFIG_IA32_EMULATION memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, @@ -263,11 +260,8 @@ void __cpuinit xsave_init(void) /* * Enable all the features that the HW is capable of * and the Linux kernel is aware of. - * - * xsetbv(); */ - asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), - "a" (pcntxt_lmask), "d" (pcntxt_hmask)); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } /* @@ -287,36 +281,31 @@ void __init xsave_cntxt_init(void) unsigned int eax, ebx, ecx, edx; cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); - pcntxt_lmask = eax; - pcntxt_hmask = edx; - - if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", - pcntxt_lmask); + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); BUG(); } /* * for now OS knows only about FP/SSE */ - pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; - pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; - + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); /* * Recompute the context size for enabled features */ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; prepare_fx_sw_frame(); setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " "cntxt size 0x%x\n", - (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); + pcntxt_mask, xstate_size); } -- cgit v1.2.3 From 7de08b4e1ed8d80e6086f71b7e99fc4b397aae39 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:51 -0300 Subject: x86: coding styles fixes to arch/x86/kernel/process_64.c Fix about 50 errors and many warnings without change process_64.o arch/x86/kernel/process_64.o: text data bss dec hex filename 5236 8 24 5268 1494 process_64.o.after 5236 8 24 5268 1494 process_64.o.before md5: 9c35e9debdea4e471288c6e8ca267a75 process_64.o.after 9c35e9debdea4e471288c6e8ca267a75 process_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 101 +++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 51 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7d9a16..4da8514dd25c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,11 +37,11 @@ #include #include #include +#include +#include -#include #include #include -#include #include #include #include @@ -88,7 +88,7 @@ void exit_idle(void) #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); -#include +#include /* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { @@ -152,7 +152,7 @@ void cpu_idle(void) } /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs * regs) +void __show_regs(struct pt_regs *regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -177,28 +177,28 @@ void __show_regs(struct pt_regs * regs) printk("RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); printk("R10: %016lx R11: %016lx R12: %016lx\n", - regs->r10, regs->r11, regs->r12); + regs->r10, regs->r11, regs->r12); printk("R13: %016lx R14: %016lx R15: %016lx\n", - regs->r13, regs->r14, regs->r15); + regs->r13, regs->r14, regs->r15); - asm("movl %%ds,%0" : "=r" (ds)); - asm("movl %%cs,%0" : "=r" (cs)); - asm("movl %%es,%0" : "=r" (es)); + asm("movl %%ds,%0" : "=r" (ds)); + asm("movl %%cs,%0" : "=r" (cs)); + asm("movl %%es,%0" : "=r" (es)); asm("movl %%fs,%0" : "=r" (fsindex)); asm("movl %%gs,%0" : "=r" (gsindex)); rdmsrl(MSR_FS_BASE, fs); - rdmsrl(MSR_GS_BASE, gs); - rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + rdmsrl(MSR_GS_BASE, gs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", - fs,fsindex,gs,gsindex,shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); + printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + fs, fsindex, gs, gsindex, shadowgs); + printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); @@ -314,10 +314,10 @@ void prepare_to_copy(struct task_struct *tsk) int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, - struct task_struct * p, struct pt_regs * regs) + struct task_struct *p, struct pt_regs *regs) { int err; - struct pt_regs * childregs; + struct pt_regs *childregs; struct task_struct *me = current; childregs = ((struct pt_regs *) @@ -362,10 +362,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (test_thread_flag(TIF_IA32)) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); - else -#endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); - if (err) + else +#endif + err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); + if (err) goto out; } err = 0; @@ -544,7 +544,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter>5) + if (next_p->fpu_counter > 5) prefetch(next->xstate); /* @@ -552,13 +552,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_sp0(tss, next); - /* + /* * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); + loadsegment(es, next->es); savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) @@ -584,7 +584,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_leave_lazy_cpu_mode(); - /* + /* * Switch FS and GS. * * Segment register != 0 always requires a reload. Also @@ -593,13 +593,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); - /* + /* * Check if the user used a selector != 0; if yes * clear 64bit base, since overloaded base is always * mapped to the Null selector */ if (fsindex) - prev->fs = 0; + prev->fs = 0; } /* when next process has a 64bit base use it */ if (next->fs) @@ -609,7 +609,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); if (gsindex) - prev->gs = 0; + prev->gs = 0; } if (next->gs) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); @@ -618,12 +618,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Must be after DS reload */ unlazy_fpu(prev_p); - /* + /* * Switch the PDA and FPU contexts. */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + write_pda(pcurrent, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + @@ -664,7 +664,7 @@ long sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -722,55 +722,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) unsigned long get_wchan(struct task_struct *p) { unsigned long stack; - u64 fp,ip; + u64 fp, ip; int count = 0; - if (!p || p == current || p->state==TASK_RUNNING) - return 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; stack = (unsigned long)task_stack_page(p); if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); - do { + do { if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) - return 0; + return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = *(u64 *)fp; + } while (count++ < 16); return 0; } long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; +{ + int ret = 0; int doit = task == current; int cpu; - switch (code) { + switch (code) { case ARCH_SET_GS: if (addr >= TASK_SIZE_OF(task)) - return -EPERM; + return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to + /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { + if (addr <= 0xffffffff) { + set_32bit_tls(task, GS_TLS, addr); + if (doit) { load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); + load_gs_index(GS_TLS_SEL); } - task->thread.gsindex = GS_TLS_SEL; + task->thread.gsindex = GS_TLS_SEL; task->thread.gs = 0; - } else { + } else { task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { load_gs_index(0); ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); - } + } } put_cpu(); break; @@ -824,8 +824,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gs; - } - else + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; -- cgit v1.2.3 From 8092c654de9a964c14d89da56834f73a80548a58 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:52 -0300 Subject: x86: add KERN_INFO to printks on process_64.c Fix many coding style warnings. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4da8514dd25c..3560d7f4d74e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,24 +161,24 @@ void __show_regs(struct pt_regs *regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, - regs->flags); - printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + regs->sp, regs->flags); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk("RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk("R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk("R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -196,24 +196,26 @@ void __show_regs(struct pt_regs *regs) cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); + printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + es, cr0); + printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) { - printk("CPU %d:", smp_processor_id()); + printk(KERN_INFO "CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -- cgit v1.2.3 From 08aadf069d0482ade033badefa8f03eb2fcddd9c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:53 -0300 Subject: x86: coding style fixes to arch/x86/kernel/crash_dump_64.c Fix conding style without change crash_dump_64.o arch/x86/kernel/crash_dump_64.o text data bss dec hex filename 129 0 0 129 81 crash_dump_64.o.after 129 0 0 129 81 crash_dump_64.o.before md5: 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.after 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_64.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a46..d3e524c84527 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -7,9 +7,8 @@ #include #include - -#include -#include +#include +#include /** * copy_oldmem_page - copy one page from "oldmem" @@ -25,7 +24,7 @@ * in the current kernel. We stitch up a pte, similar to kmap_atomic. */ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) + size_t csize, unsigned long offset, int userbuf) { void *vaddr; -- cgit v1.2.3 From caa007dd3687d38a0252484d9d0a8f9d929ba932 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:54 -0300 Subject: x86: coding style fixes to arch/x86/kernel/signal_64.c Fix all errors and many warnings reported by checkpatch.pl without change signal_64.o arch/x86/kernel/signal_64.o text data bss dec hex filename 5143 0 8 5151 141f signal_64.o.after 5143 0 8 5151 141f signal_64.o.before md5: e68718092b3641cb27e79e55ce57e3ad signal_64.o.after e68718092b3641cb27e79e55ce57e3ad signal_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/sigframe.h | 5 ++++ arch/x86/kernel/signal_64.c | 62 ++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2dc..8b4956e800ac 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -24,4 +24,9 @@ struct rt_sigframe { struct ucontext uc; struct siginfo info; }; + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs *regs); #endif diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd651..87a9c2f28d99 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -19,9 +19,10 @@ #include #include #include +#include + #include #include -#include #include #include #include @@ -41,11 +42,6 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs); - asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) @@ -119,7 +115,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) +#define COPY(x) (err |= __get_user(regs->x, &sc->x)) COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); @@ -149,7 +145,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user * buf; + struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); if (buf) { @@ -189,7 +185,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -199,16 +195,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs,frame,"sigreturn"); + signal_fault(regs, frame, "sigreturn"); return 0; -} +} /* * Set up a signal frame. */ static inline int -setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + unsigned long mask, struct task_struct *me) { int err = 0; @@ -264,35 +261,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + struct _fpstate __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sizeof(struct _fpstate)); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) - err |= -1; + if (save_i387(fp) < 0) + err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) goto give_sigsegv; } - + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); @@ -302,9 +299,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); - if (sizeof(*set) == 16) { + if (sizeof(*set) == 16) { __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); - __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); } else err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -315,7 +312,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + goto give_sigsegv; } if (err) @@ -323,7 +320,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->di = sig; - /* In case the signal handler was declared without prototypes */ + /* In case the signal handler was declared without prototypes */ regs->ax = 0; /* This also works for non SA_SIGINFO handlers because they expect the @@ -376,7 +373,7 @@ static long current_syscall_ret(struct pt_regs *regs) /* * OK, we're invoking a handler - */ + */ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -420,7 +417,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else ret = ia32_setup_frame(sig, ka, oldset, regs); - } else + } else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); @@ -448,9 +445,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); + sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } @@ -552,14 +549,15 @@ void do_notify_resume(struct pt_regs *regs, void *unused, } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -{ - struct task_struct *me = current; +{ + struct task_struct *me = current; if (show_unhandled_signals && printk_ratelimit()) { printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); + me->comm, me->pid, where, frame, regs->ip, + regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); printk("\n"); } - force_sig(SIGSEGV, me); -} + force_sig(SIGSEGV, me); +} -- cgit v1.2.3 From 4df9e510a9fda29aca71d8acac853b98aa6884d1 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:55 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix all errors and many warnings reported by checkpath.pl. Except the change of include to the traps.o before and after changes are the same. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 59 +++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420c..fe36d96ba70b 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #if defined(CONFIG_EDAC) #include @@ -45,9 +47,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", address, reliable ? + "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / + EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, } /* - * x86-64 can have up to three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) :); + asm("movq %%rbp, %0" : "=r" (bp) : ); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -357,11 +358,13 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irqstack_end = + (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = + (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - // debugging aid: "show_stack(NULL, NULL);" prints the - // back trace for this cpu. + /* debugging aid: "show_stack(NULL, NULL);" prints the + back trace for this cpu. */ if (sp == NULL) { if (task) @@ -404,7 +407,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movq %%rbp, %0" : "=r" (bp):); + asm("movq %%rbp, %0" : "=r" (bp) : ); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -414,7 +417,6 @@ void dump_stack(void) init_utsname()->version); show_trace(NULL, NULL, &stack, bp); } - EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) @@ -493,7 +495,7 @@ unsigned __kprobes long oops_begin(void) raw_local_irq_save(flags); cpu = smp_processor_id(); if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) + if (cpu == die_owner) /* nested oops. should stop eventually */; else __raw_spin_lock(&die_lock); @@ -638,7 +640,7 @@ kernel_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -648,7 +650,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -683,7 +685,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } -asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; @@ -778,9 +780,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) } static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) return; printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); @@ -882,7 +885,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) else if (user_mode(eregs)) regs = task_pt_regs(current); /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + kernel process stack. */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -891,7 +894,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void __kprobes do_debug(struct pt_regs * regs, +asmlinkage void __kprobes do_debug(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk = current; @@ -1035,7 +1038,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) asmlinkage void bad_intr(void) { - printk("bad interrupt"); + printk("bad interrupt"); } asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) @@ -1047,7 +1050,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) conditional_sti(regs); if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) + kernel_math_error(regs, "kernel simd math error", 19)) return; /* @@ -1092,7 +1095,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) { } @@ -1142,8 +1145,10 @@ void __init trap_init(void) set_intr_gate(0, ÷_error); set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); - set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ - set_system_gate(4, &overflow); /* int4 can be called from all */ + /* int3 can be called from all */ + set_system_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); -- cgit v1.2.3 From e9c8abb66cc37801bdb5d4360bb78d180c3bbb73 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:56 -0300 Subject: x86: coding style fixes to arch/x86/kernel/sys_x86_64.c Fix all errors and many warnings reported by checkpatch.pl without change sys_x86_64.o arch/x86/kernel/sys_x86_64.o: text data bss dec hex filename 1567 0 0 1567 61f sys_x86_64.o.after 1567 0 0 1567 61f sys_x86_64.o.before md5: de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.after de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/sys_x86_64.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef33817..56eb8f916e9f 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -13,15 +13,16 @@ #include #include #include +#include -#include #include -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off) +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off) { long error; - struct file * file; + struct file *file; error = -EINVAL; if (off & ~PAGE_MASK) @@ -56,9 +57,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, unmapped base down for this case. This can give conflicts with the heap, but we assume that glibc malloc knows how to fall back to mmap. Give it 1GB - of playground for now. -AK */ - *begin = 0x40000000; - *end = 0x80000000; + of playground for now. -AK */ + *begin = 0x40000000; + *end = 0x80000000; if (current->flags & PF_RANDOMIZE) { new_begin = randomize_range(*begin, *begin + 0x02000000, 0); if (new_begin) @@ -66,9 +67,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, } } else { *begin = TASK_UNMAPPED_BASE; - *end = TASK_SIZE; + *end = TASK_SIZE; } -} +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -78,11 +79,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; unsigned long begin, end; - + if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(flags, &begin, &end); if (len > end) return -ENOMEM; @@ -96,12 +97,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) && len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; + mm->cached_hole_size = 0; mm->free_area_cache = begin; } addr = mm->free_area_cache; - if (addr < begin) - addr = begin; + if (addr < begin) + addr = begin; start_addr = addr; full_search: @@ -127,7 +128,7 @@ full_search: return addr; } if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; } @@ -177,7 +178,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr-len); if (!vma || addr <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return mm->free_area_cache = addr-len; } if (mm->mmap_base < len) @@ -194,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (!vma || addr+len <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + return mm->free_area_cache = addr; /* remember the largest hole we saw so far */ if (addr + mm->cached_hole_size < vma->vm_start) @@ -224,13 +225,13 @@ bottomup: } -asmlinkage long sys_uname(struct new_utsname __user * name) +asmlinkage long sys_uname(struct new_utsname __user *name) { int err; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) - err |= copy_to_user(&name->machine, "i686", 5); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } -- cgit v1.2.3 From a677f58a8c8c541bf7d02c658545084040f3708d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 00:37:10 -0700 Subject: x86: print per_cpu data address to make sure per_cpu data on correct node. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c006..61f3966632a8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -180,9 +180,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); -- cgit v1.2.3 From a0ac87d61ba20932e54f08464d3f3439d78fa878 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:04 +0200 Subject: x86: AMD microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 07e52beacb44..6789530ef33b 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -47,9 +47,9 @@ MODULE_LICENSE("GPL v2"); #define UCODE_UCODE_TYPE 0x00000001 #define UCODE_MAX_SIZE (2048) -#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DEFAULT_UCODE_DATASIZE (896) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define DWSIZE (sizeof(u32)) /* For now we support a fixed ucode total size only */ #define get_totalsize(mc) \ -- cgit v1.2.3 From f516526febb75500f280def2108688d388df4e1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:05 +0200 Subject: x86: Intel microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 6da4a85ff465..a61986e8b691 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/* #define DEBUG */ /* pr_debug */ #include #include #include @@ -99,11 +99,11 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.totalsize ? \ -- cgit v1.2.3 From 831f9bd3151ebd22959a0cb2a20b44a06b26d4ed Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:06 +0200 Subject: x86: moved function declarations out from AMD microcode patch loader to heade file Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 6789530ef33b..2b98e6ab1bf9 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -56,9 +56,6 @@ MODULE_LICENSE("GPL v2"); ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + MC_HEADER_SIZE) -extern int microcode_init(void *opaque, struct module *module); -extern void microcode_exit(void); - /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -- cgit v1.2.3 From daa9c0fee1cbe1d49fbe29af3d4bb16312043b1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:07 +0200 Subject: x86: minor pointer type cast in AMD microcode patch loader Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 2b98e6ab1bf9..33b2a217a8c5 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -394,7 +394,8 @@ static int cpu_request_microcode_amd(int cpu) return error; } - buf_pos = buf = firmware->data; + buf_pos = (unsigned int *)firmware->data; + buf = (void *)firmware->data; size = firmware->size; if (buf_pos[0] != UCODE_MAGIC) { -- cgit v1.2.3 From 7ab6af7ab69df8c9c5fbc380004fb81187742096 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jul 2008 17:36:48 -0700 Subject: x86_32: use apic_ops at print_local_APIC() Use apic_icr_read at print_local_APIC() in io_apic_32.c Signed-off-by: Hiroshi Shimamoto Cc: Suresh Siddha Cc: Yinghai Lu Signed-off-by: Ingo Molnar Cc: Suresh Siddha Cc: Yinghai Lu --- arch/x86/kernel/io_apic_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f3..39e063a9a28a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1486,6 +1486,7 @@ static void print_APIC_bitfield(int base) void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1536,10 +1537,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From e0d22d03c06c4e2c194d7010bc1e4a972199f156 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 31 Jul 2008 23:43:44 +0200 Subject: x86: fdiv bug detection fix The fdiv detection code writes s32 integer into the boot_cpu_data.fdiv_bug. However, the boot_cpu_data.fdiv_bug is only char (s8) field so the detection overwrites already set fields for other bugs, e.g. the f00f bug field. Use local s32 variable to receive result. This is a partial fix to Bugzilla #9928 - fixes wrong information about the f00f bug (tested) and probably for coma bug (I have no cpu to test this). Signed-off-by: Krzysztof Helt Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9b58a806e85..c8e315f1aa83 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; */ static void __init check_fpu(void) { + s32 fdiv_bug; + if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); @@ -74,8 +76,10 @@ static void __init check_fpu(void) "fistpl %0\n\t" "fwait\n\t" "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) + : "=m" (*&fdiv_bug) : "m" (*&x), "m" (*&y)); + + boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) printk("Hmm, FPU with FDIV bug.\n"); } -- cgit v1.2.3 From ec983f7060cd73e14cdd3edd910339127a8a4e96 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 30 Jul 2008 12:05:03 -0400 Subject: [CPUFREQ] Remove EXPERIMENTAL annotation from VIA C7 powersaver kconfig. This has been pretty solid, and doesn't see much change at all. Noticed by Harald Welte. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596d..efae3b22a0ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -235,9 +235,9 @@ config X86_LONGHAUL If in doubt, say N. config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + tristate "VIA C7 Enhanced PowerSaver" select CPU_FREQ_TABLE - depends on X86_32 && EXPERIMENTAL + depends on X86_32 help This adds the CPUFreq driver for VIA C7 processors. -- cgit v1.2.3 From 460f5ef2835dbc33825f611f408eb09c29be4b85 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 30 Jul 2008 13:01:42 -0400 Subject: [CPUFREQ] Fix warning in elanfreq arch/x86/kernel/cpu/cpufreq/elanfreq.c:47:26: warning: symbol 'elan_multiplier' was not declared. Should it be static? Yes, yes it should. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f563..e4a4bf870e94 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -44,7 +44,7 @@ struct s_elan_multiplier { * It is important that the frequencies * are listed in ascending order here! */ -struct s_elan_multiplier elan_multiplier[] = { +static struct s_elan_multiplier elan_multiplier[] = { {1000, 0x02, 0x18}, {2000, 0x02, 0x10}, {4000, 0x02, 0x08}, -- cgit v1.2.3 From 23431b495fcbbde6d09aba30019b6a224d543cf9 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 31 Jul 2008 12:39:05 -0500 Subject: [CPUFREQ][1/2] whitespace fix for powernow-k8 Trivial whitespace fix for powernow-k8. Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c45ca6d4dce1..84bb395038d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) return 800 + (fid * 100); } - /* Return a frequency in KHz, given an input fid */ static u32 find_khz_freq_from_fid(u32 fid) { @@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p return data[pstate].frequency; } - /* Return the vco fid for an input fid * * Each "low" fid has corresponding "high" fid, and you can get to "low" fids @@ -166,7 +164,6 @@ static void fidvid_msr_init(void) wrmsr(MSR_FIDVID_CTL, lo, hi); } - /* write the new fid value along with the other control fields to the msr */ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { -- cgit v1.2.3 From 34ae7f35a21694aa5cb8829dc5142c39d73d6ba0 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 31 Jul 2008 12:39:12 -0500 Subject: [CPUFREQ][2/2] preregister support for powernow-k8 This patch provides support for the _PSD ACPI object in the Powernow-k8 driver. Although it looks like an invasive patch, most of it is simply the consequence of turning the static acpi_performance_data structure into a pointer. AMD has tested it on several machines over the past few days without issue. [trivial checkpatch warnings fixed up by davej] [X86_POWERNOW_K8_ACPI=n buildfix from Randy Dunlap] Signed-off-by: Mark Langsdorf Tested-by: Frank Arnold Signed-off-by: Randy Dunlap Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 109 ++++++++++++++++++++---------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 3 +- 2 files changed, 75 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 84bb395038d8..4e7271999a74 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -737,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data) #ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) return; - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; + data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; +} + + +static struct acpi_processor_performance *acpi_perf_data; +static int preregister_valid; + +static int powernow_k8_cpu_preinit_acpi(void) +{ + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) + return -ENODEV; + + if (acpi_processor_preregister_performance(acpi_perf_data)) + return -ENODEV; + else + preregister_valid = 1; + return 0; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val; + int cpu = 0; - if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); + if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); return -EIO; } /* verify the data contained in the ACPI structures */ - if (data->acpi_data.state_count <= 1) { + if (data->acpi_data->state_count <= 1) { dprintk("No ACPI P-States\n"); goto err_out; } - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); + data->acpi_data->control_register.space_id, + data->acpi_data->status_register.space_id); goto err_out; } /* fill in data->powernow_table */ powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data.state_count + 1)), GFP_KERNEL); + * (data->acpi_data->state_count + 1)), GFP_KERNEL); if (!powernow_table) { dprintk("powernow_table memory alloc failure\n"); goto err_out; @@ -787,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) if (ret_val) goto err_out_mem; - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; + powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data->state_count].index = 0; data->powernow_table = powernow_table; /* fill in data */ - data->numps = data->acpi_data.state_count; + data->numps = data->acpi_data->state_count; if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -800,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + /* determine affinity, from ACPI if available */ + if (preregister_valid) { + if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || + (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) + data->starting_core_affinity = data->acpi_data->shared_cpu_map; + else + data->starting_core_affinity = cpumask_of_cpu(data->cpu); + } else { + /* best guess from family if not */ + if (cpu_family == CPU_HW_PSTATE) + data->starting_core_affinity = cpumask_of_cpu(data->cpu); + else + data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu); + } + return 0; err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(data->acpi_data, data->cpu); /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data.state_count = 0; + data->acpi_data->state_count = 0; return -ENODEV; } @@ -821,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; - for (i = 0; i < data->acpi_data.state_count; i++) { + for (i = 0; i < data->acpi_data->state_count; i++) { u32 index; - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + index = data->acpi_data->states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); @@ -840,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf powernow_table[i].index = index; - powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; + powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; } return 0; } @@ -849,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf { int i; int cntlofreq = 0; - for (i = 0; i < data->acpi_data.state_count; i++) { + for (i = 0; i < data->acpi_data->state_count; i++) { u32 fid; u32 vid; if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + fid = data->acpi_data->states[i].status & EXT_FID_MASK; + vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + fid = data->acpi_data->states[i].control & FID_MASK; + vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); @@ -899,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf cntlofreq = i; } - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } @@ -912,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { - if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + if (data->acpi_data->state_count) + acpi_processor_unregister_performance(data->acpi_data, data->cpu); } #else +static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } @@ -1101,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; - cpumask_t oldmask; + cpumask_t oldmask = CPU_MASK_ALL; int rc; if (!cpu_online(pol->cpu)) @@ -1174,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* run on any CPU again */ set_cpus_allowed_ptr(current, &oldmask); - if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); - else - pol->cpus = per_cpu(cpu_core_map, pol->cpu); + pol->cpus = data->starting_core_affinity; data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1300,6 +1332,7 @@ static int __cpuinit powernowk8_init(void) } if (supported_cpus == num_online_cpus()) { + powernow_k8_cpu_preinit_acpi(); printk(KERN_INFO PFX "Found %d %s " "processors (%d cpu cores) (" VERSION ")\n", num_online_nodes(), @@ -1316,6 +1349,10 @@ static void __exit powernowk8_exit(void) dprintk("exit\n"); cpufreq_unregister_driver(&cpufreq_amd64_driver); + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI + free_percpu(acpi_perf_data); +#endif } MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d96..a62612cd4be8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -33,12 +33,13 @@ struct powernow_k8_data { #ifdef CONFIG_X86_POWERNOW_K8_ACPI /* the acpi table needs to be kept. it's only available if ACPI was * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance acpi_data; + struct acpi_processor_performance *acpi_data; #endif /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ cpumask_t *available_cores; + cpumask_t starting_core_affinity; }; -- cgit v1.2.3 From 31343d8a5079cda57ffd539fcf4f00cea344fe98 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 8 Aug 2008 12:15:57 -0700 Subject: x86: Fix broken VMI in 2.6.27-rc.. The lowmem mapping table created by VMI need not depend on max_low_pfn at all. Instead we now create an extra large mapping which covers all possible lowmem instead of the physical ram that is actually available. This allows the vmi initialization to be done before max_low_pfn could be computed. We also move the vmi_init code very early in the boot process so that nobody accidentally breaks the fixmap dependancy. Signed-off-by: Alok N Kataria Acked-by: Zachary Amsden Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 16 ++++++++-------- arch/x86/kernel/vmi_32.c | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2d888586385d..6e5823b9e765 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -604,6 +604,14 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); early_ioremap_init(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); screen_info = boot_params.screen_info; edid_info = boot_params.edid_info; @@ -817,14 +825,6 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be after max_low_pfn is determined, and before kernel - * pagetables are setup. - */ - vmi_init(); -#endif - paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); paravirt_pagetable_setup_done(swapper_pg_dir); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922d..6ca515d6db54 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -37,6 +37,7 @@ #include #include #include +#include /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -683,7 +684,7 @@ void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ if (vmi_ops.set_linear_mapping) - vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); + vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); } /* -- cgit v1.2.3 From 90936cfe6c8f7e90a6f8b0c5cb44d3a012dfd313 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..46af71676738 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3 From 85a14437ed24244c78f9a70d58b8299753b03c92 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:12:37 +0200 Subject: x86: fix MP_processor_info section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1fe7): Section mismatch in reference from the function MP_processor_info() to the variable .init.data:x86_quirks The function __cpuinit MP_processor_info() references a variable __initdata x86_quirks. If x86_quirks is only used by MP_processor_info then annotate x86_quirks with a matching annotation. MP_processor_info uses x86_quirks which is __init and is used only from smp_read_mpc and construct_default_ISA_mptable which are __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed8..78509ee8cc74 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; -- cgit v1.2.3 From bafc1dae8215c862c2e6ae913ddadc20581e59b9 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:11:13 +0200 Subject: x86: mmconf: fix section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1591): Section mismatch in reference from the function init_amd() to the function .init.text:check_enable_amd_mmconf_dmi() The function __cpuinit init_amd() references a function __init check_enable_amd_mmconf_dmi(). If check_enable_amd_mmconf_dmi is only used by init_amd then annotate check_enable_amd_mmconf_dmi with a matching annotation. check_enable_amd_mmconf_dmi is only called from init_amd which is __cpuinit Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b366..efc2f361fe85 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } -- cgit v1.2.3 From d406d21d90dce2e66c7eb4a44605aac947fe55fb Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:09:38 +0200 Subject: x86: mpparse.c: fix section mismatch warning WARNING: vmlinux.o(.text+0x118f7): Section mismatch in reference from the function construct_ioapic_table() to the function .init.text:MP_bus_info() The function construct_ioapic_table() references the function __init MP_bus_info(). This is often because construct_ioapic_table lacks a __init annotation or the annotation of MP_bus_info is wrong. construct_ioapic_table is called only from construct_default_ISA_mptable which is __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 78509ee8cc74..2c1963b39621 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) -- cgit v1.2.3 From d388e5fdc461344d04307a3fa83862b9ed429647 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 Aug 2008 15:09:02 -0700 Subject: x86: Restore proper vector locking during cpu hotplug Having cpu_online_map change during assign_irq_vector can result in some really nasty and weird things happening. The one that bit me last time was accessing non existent per cpu memory for non existent cpus. This locking was removed in a sloppy x86_64 and x86_32 merge patch. Guys can we please try and avoid subtly breaking x86 when we are merging files together? Signed-off-by: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_apic_32.c | 6 +----- arch/x86/kernel/io_apic_64.c | 25 +++++++++++++++---------- arch/x86/kernel/smpboot.c | 12 +++++++++--- 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c5..09cddb57bec4 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -57,7 +57,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq) return vector; } -void setup_vector_irq(int cpu) -{ -} - static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d1707..61a83b70c18f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -101,7 +101,7 @@ int timer_through_8259 __initdata; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + static int __assign_irq_vector(int irq, cpumask_t mask) { /* @@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -static void __setup_vector_irq(int cpu) +void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu) } } -void setup_vector_irq(int cpu) -{ - spin_lock(&vector_lock); - __setup_vector_irq(smp_processor_id()); - spin_unlock(&vector_lock); -} - - static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4f..da10f07fc59c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused) * for which cpus receive the IPI. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). + * + * We need to hold vector_lock so there the set of online cpus + * does not change while we are assigning vectors to cpus. Holding + * this lock ensures we don't half assign or remove an irq from a cpu. */ ipi_call_lock_irq(); -#ifdef CONFIG_X86_IO_APIC - setup_vector_irq(smp_processor_id()); -#endif + lock_vector_lock(); + __setup_vector_irq(smp_processor_id()); cpu_set(smp_processor_id(), cpu_online_map); + unlock_vector_lock(); ipi_call_unlock_irq(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -1336,7 +1340,9 @@ int __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ + lock_vector_lock(); remove_cpu_from_maps(cpu); + unlock_vector_lock(); fixup_irqs(cpu_online_map); return 0; } -- cgit v1.2.3 From 4c942654a4514d7d0a9b592a7d1b198a212e8a03 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 20:57:45 +0800 Subject: arch/x86/kernel/acpi/boot.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/acpi/boot.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d71290..d8d118935b08 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 #include -#include #else /* X86 */ -- cgit v1.2.3 From 8aeb4022633f7d0eca5e13a9622bd73df92bbf2a Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 21:09:22 +0800 Subject: arch/x86/kernel/cpuid.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/cpuid.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpuid.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3be31c..3fa4e926b510 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 48 ------------------------------------------- arch/x86/kernel/irqinit_32.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f93c18f5b79d..9e341c9d9414 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1354,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee1..9200a1e2752d 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ -- cgit v1.2.3 From eeb0d7d113895556db473ff1e638803d7d49bff9 Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 17:44:57 +0200 Subject: x86, debug: tone down arch/x86/kernel/mpparse.c debugging printk commit 11a62a056093a7f25f1595fbd8bd5f93559572b6 turns some formerly nopped debugging printks in arch/x86/kernel/mppparse.c into regular ones. The one at the top of smp_scan_config() in particular also prints on !CONFIG_SMP/CONFIG_X86_LOCAL_APIC kernels and UP machines without anything resembling MP tables which makes their lowly UP owners wonder... Turn the former Dprintk()s into apic_printk()s instead meaning that their printing is dependent on passing the apic=verbose (or =debug) command line param. On 32-bit, "apic" is a __setup() param which isn't early enough for this code and therefore needs a followup changing it into an early_param(). On 64-bit, it already is. Signed-off-by: Rene Herman Cc: Andrew Morton Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed8..678090508a62 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) if (x86_quirks->mpc_oem_bus_info) x86_quirks->mpc_oem_bus_info(m, str); else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, @@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, @@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", + bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { -- cgit v1.2.3 From fb6bef8002d54a9c3062abc281893ec7c896d3ce Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 17:45:53 +0200 Subject: x86: make "apic" an early_param() on 32-bit On 32-bit, "apic" is a __setup() param meaning it is parsed rather late in the game. Make it an early_param() for apic_printk() use by arch/x86/kernel/mpparse.c. On 64-bit, it already is an early_param(). Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c898358371..f432d4833a12 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1726,9 +1726,9 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", str) == 0) apic_verbosity = APIC_VERBOSE; - return 1; + return 0; } -__setup("apic=", apic_set_verbosity); +early_param("apic", apic_set_verbosity); static int __init lapic_insert_resource(void) { -- cgit v1.2.3 From 9b0094f7f289d752868aae1a3984517d712cbb22 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Aug 2008 15:14:55 -0700 Subject: x86, pci-calgary: fix function declaration Fix function declaration: linux-next-20080807/arch/x86/kernel/pci-calgary_64.c:1353:36: warning: non-ANSI function declaration of function 'get_tce_space_from_tar' Signed-off-by: Randy Dunlap Acked-by: Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b67a4b1d4eae..02d19328525d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) * Function for kdump case. Get the tce tables from first kernel * by reading the contents of the base adress register of calgary iommu */ -static void get_tce_space_from_tar() +static void get_tce_space_from_tar(void) { int bus; void __iomem *target; -- cgit v1.2.3 From 48d97cb65e62a5f1122ac2cf1149800d4f4693e8 Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 19:20:17 +0200 Subject: x86: make "apic" an early_param() on 32-bit, NULL check Cyrill Gorcunov observed: > you turned it into early_param so now it's NULL injecting vulnerabled. > Could you please add checking for NULL str param? fix that. Also, change the name of 'str' into 'arg', to make it more apparent that this is an optional argument that can be NULL, not a string parameter that is empty when unset. Reported-by: Cyrill Gorcunov Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f432d4833a12..039a8d4aaf62 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1720,12 +1720,16 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init apic_set_verbosity(char *str) +static int __init apic_set_verbosity(char *arg) { - if (strcmp("debug", str) == 0) + if (!arg) + return -EINVAL; + + if (strcmp(arg, "debug") == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) + else if (strcmp(arg, "verbose") == 0) apic_verbosity = APIC_VERBOSE; + return 0; } early_param("apic", apic_set_verbosity); -- cgit v1.2.3 From 49800efcb17afdf973f33e8aa8807b7f83993cc6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6f9b8924bdc0..8396fd7628a9 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -493,17 +493,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v1.2.3 From b55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8396fd7628a9..cc6efe86249d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,19 +606,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v1.2.3 From b74548e76a0eab1f29546e7c5a589429c069a680 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 11 Aug 2008 13:36:04 -0700 Subject: x86: fix 2.6.27rc1 cannot boot more than 8CPUs Jeff Chua reported that booting a !bigsmp kernel on a 16-way box hangs silently. this is a long-standing issue, smp start AP cpu could check the apic id >=8 etc before trying to start it. achieve this by moving the def_to_bigsmp check later and skip the apicid id > 8 [ mingo@elte.hu: clean up the message that is printed. ] Reported-by: "Jeff Chua" Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar arch/x86/kernel/setup.c | 6 ------ arch/x86/kernel/smpboot.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) --- arch/x86/kernel/setup.c | 6 ------ arch/x86/kernel/smpboot.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6e5823b9e765..68b48e3fbcbd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -861,12 +861,6 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) - if (def_to_bigsmp) - printk(KERN_WARNING "More than 8 CPUs detected and " - "CONFIG_X86_PC cannot handle it.\nUse " - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); -#endif kvm_guest_init(); e820_reserve_resources(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index da10f07fc59c..91055d7fc1b0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -994,7 +994,17 @@ int __cpuinit native_cpu_up(unsigned int cpu) flush_tlb_all(); low_mappings = 1; +#ifdef CONFIG_X86_PC + if (def_to_bigsmp && apicid > 8) { + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + err = -1; + } else + err = do_boot_cpu(apicid, cpu); +#else err = do_boot_cpu(apicid, cpu); +#endif zap_low_mappings(); low_mappings = 0; -- cgit v1.2.3 From c9d08f0860d47ed6a3fe91d0f19335086179be7b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Aug 2008 23:23:03 +0200 Subject: x86: fix 2 section mismatch warnings - map_high() WARNING: vmlinux.o(.text+0x14cf8): Section mismatch in reference from the function map_high() to the function .init.text:init_extra_mapping_uc() The function map_high() references the function __init init_extra_mapping_uc(). This is often because map_high lacks a __init annotation or the annotation of init_extra_mapping_uc is wrong. WARNING: vmlinux.o(.text+0x14d05): Section mismatch in reference from the function map_high() to the function .init.text:init_extra_mapping_wb() The function map_high() references the function __init init_extra_mapping_wb(). This is often because map_high lacks a __init annotation or the annotation of init_extra_mapping_wb is wrong. map_high is called only from __init functions (map_*_high) and calls 2 __init_functions (init_extra_mapping_*) Signed-off-by: Marcin Slusarz Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2cfcbded888a..2d7e307c7779 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -222,7 +222,7 @@ static __init void map_low_mmrs(void) enum map_type {map_wb, map_uc}; -static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) { unsigned long bytes, paddr; -- cgit v1.2.3 From 6b3560229d3b6be7443fa9f9c6502e660bcfef5f Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Aug 2008 23:23:05 +0200 Subject: x86: fix 2 section mismatch warnings - find_and_reserve_crashkernel WARNING: vmlinux.o(.text+0xcd1f): Section mismatch in reference from the function find_and_reserve_crashkernel() to the function .init.text:find_e820_area() The function find_and_reserve_crashkernel() references the function __init find_e820_area(). This is often because find_and_reserve_crashkernel lacks a __init annotation or the annotation of find_e820_area is wrong. WARNING: vmlinux.o(.text+0xcd38): Section mismatch in reference from the function find_and_reserve_crashkernel() to the function .init.text:reserve_bootmem_generic() The function find_and_reserve_crashkernel() references the function __init reserve_bootmem_generic(). This is often because find_and_reserve_crashkernel lacks a __init annotation or the annotation of reserve_bootmem_generic is wrong. find_and_reserve_crashkernel is called from __init function (reserve_crashkernel) and calls 2 __init functions (find_e820_area, reserve_bootmem_generic), so mark it __init Signed-off-by: Marcin Slusarz Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3fbcbd..a4656adab53b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void) * @size: Size of the crashkernel memory to reserve. * Returns the base address on success, and -1ULL on failure. */ -unsigned long long find_and_reserve_crashkernel(unsigned long long size) +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) { const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long start = 0LL; -- cgit v1.2.3 From a726c6009e6eba4acfccf8b683854866eeabb184 Mon Sep 17 00:00:00 2001 From: John Keller Date: Tue, 29 Jul 2008 14:34:16 -0500 Subject: x86: allow MMCONFIG above 4GB on x86_64 SGI UV will have MMCFG base addresses that are greater than 4GB (32 bits). v2: Use CONFIG_RESOURCES_64BIT instead of CONFIG_X86_64. v3: Create a flag, that is set by platform specific code, to disable the > 4GB check. Signed-off-by: John Keller Cc: jpk@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d71290..bfd10fd211cd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +{ + if (!strcmp(mcfg->header.oem_id, "SGI")) + acpi_mcfg_64bit_base_addr = TRUE; + + return 0; +} + int __init acpi_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; @@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + + acpi_mcfg_oem_check(mcfg); + for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && + !acpi_mcfg_64bit_base_addr) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); kfree(pci_mmcfg_config); -- cgit v1.2.3 From 34a1b9fc4995102ecbb3a980b348aebf168d8196 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 12 Aug 2008 13:25:44 +0100 Subject: Fix date output in x86 microcode driver. The microcode stores its date in a uint32_t in some weird order approximating pdp-endian. Rather than printing it like that, print it properly in ISO standard form. Signed-off-by: David Woodhouse Cc: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a61986e8b691..d2d9d74f4cbb 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -339,8 +339,11 @@ static void apply_microcode(int cpu) return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); + "0x%x to 0x%x, date = %04x-%02x-%02x \n", + cpu_num, uci->rev, val[1], + uci->mc.mc_intel->hdr.date & 0xffff, + uci->mc.mc_intel->hdr.date >> 24, + (uci->mc.mc_intel->hdr.date >> 16) & 0xff); uci->rev = val[1]; } -- cgit v1.2.3 From ee2b92a8201a40021ecd1aee6f0625dc03bacc54 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:13 -0700 Subject: x86, xsave: remove the redundant access_ok() in setup_rt_frame() save_i387_xstate() is already doing the required access_ok(). Remove the redundant access_ok() before it. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 2621b98f5bf6..6c581698ab56 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -208,9 +208,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) - goto give_sigsegv; - if (save_i387_xstate(fp) < 0) err |= -1; } else -- cgit v1.2.3 From ed405958057ca6a8c4c9178a7a3b1167fabb45f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:14 -0700 Subject: x86, xsave: clear the user buffer before doing fxsave/xsave fxsave/xsave instructions will not touch all the bytes in the fxsave/xsave frame. Clear the user buffer before doing fxsave/xsave directly to user buffer during the sigcontext setup. This is essentially needed in the context of xsave(for example, some of the fields in the xsave header are not touched by the xsave and defined as must be zero). This will also present uniform and clean context to the user (from which user can safely do fxrstor/xrstor). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7415f3e38a51..bb097b1644d8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -92,6 +92,12 @@ int save_i387_xstate(void __user *buf) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { + /* + * Start with clearing the user buffer. This will present a + * clean context for the bytes not touched by the fxsave/xsave. + */ + __clear_user(buf, sig_xstate_size); + if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); else -- cgit v1.2.3 From f65bc214e042916135256620f900e9599d65e0cb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:15 -0700 Subject: x86, xsave: use BUG_ON() instead of BUILD_BUG_ON() All these structure sizes are runtime determined. So use a runtime bug check. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bb097b1644d8..07713d64debe 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -82,8 +82,7 @@ int save_i387_xstate(void __user *buf) if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) return -EACCES; - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); + BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); -- cgit v1.2.3 From 23b49c19f6946cc33392a1fc75dd788dd4a90fb7 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Mon, 11 Aug 2008 14:55:31 -0700 Subject: x86: resurrect proper handling of maxcpus= kernel option (v2) For some reason we had two parsers registered for maxcpus=. One in init/main.c and another in arch/x86/smpboot.c. So I nuked the one in arch/x86. Also 64-bit kernels used to handle maxcpus= as documented in Documentation/cpu-hotplug.txt. CPUs with 'id > maxcpus' are initialized but not booted. 32-bit version for some reason ignored them even though all the infrastructure for booting them later is there. In the current mainline both 64 and 32 bit versions are broken. This patch restores the correct behaviour. I've tested x86_64 version on 4- and 8- way Core2 and 2-way Opteron based machines. Various config combinations SMP, !SMP, CPU_HOTPLUG, !CPU_HOTPLUG. Booted with maxcpus=1 and maxcpus=4, etc. Everything is working as expected. So far we've received two reports from different people confirming that 32-bit version also works fine, both on dual core laptops and 16way server machines. [v2: This version fixes visws breakage pointed out by Ingo.] Signed-off-by: Max Krasnyansky Cc: lizf@cn.fujitsu.com Cc: jeff.chua.linux@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 -------- arch/x86/kernel/apic_64.c | 7 ------- arch/x86/kernel/smpboot.c | 14 -------------- arch/x86/kernel/visws_quirks.c | 6 ++---- 4 files changed, 2 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 039a8d4aaf62..f88bd0d982b0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1454,8 +1454,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; @@ -1482,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7ee..446c062e831c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -90,7 +90,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ @@ -1062,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 91055d7fc1b0..e25287e4a85f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1386,17 +1386,3 @@ void __cpu_die(unsigned int cpu) BUG(); } #endif - -/* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ -static int __init parse_maxcpus(char *arg) -{ - extern unsigned int maxcpus; - - if (arg) - maxcpus = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c48..594ef47f0a63 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early) return 1; } -extern unsigned int __cpuinitdata maxcpus; - /* * The Visual Workstation is Intel MP compliant in the hardware * sense, but it doesn't have a BIOS(-configuration table). @@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve) ncpus = CO_CPU_MAX; } - if (ncpus > maxcpus) - ncpus = maxcpus; + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; -- cgit v1.2.3 From a58f03b07539f6575adaa011712fa139c9343742 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 14 Aug 2008 02:16:30 -0700 Subject: x86: check bigsmp in smp_sanity_check instead of cpu_up clear bits for cpu nr > 8. This allows us to boot the full range of possible CPUs that the supported APIC model will allow. Previously we'd hang or boot up with less than 8 CPUs. Signed-off-by: Yinghai Lu Tested-by: Jeff Chua Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e25287e4a85f..a8fb8a980fae 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -994,17 +994,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) flush_tlb_all(); low_mappings = 1; -#ifdef CONFIG_X86_PC - if (def_to_bigsmp && apicid > 8) { - printk(KERN_WARNING - "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); - err = -1; - } else - err = do_boot_cpu(apicid, cpu); -#else err = do_boot_cpu(apicid, cpu); -#endif zap_low_mappings(); low_mappings = 0; @@ -1058,6 +1048,34 @@ static __init void disable_smp(void) static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); + +#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) + if (def_to_bigsmp && nr_cpu_ids > 8) { + unsigned int cpu; + unsigned nr; + + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + + nr = 0; + for_each_present_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_present_map); + nr++; + } + + nr = 0; + for_each_possible_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_possible_map); + nr++; + } + + nr_cpu_ids = 8; + } +#endif + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk(KERN_WARNING "weird, boot CPU (#%d) not listed" "by the BIOS.\n", hard_smp_processor_id()); -- cgit v1.2.3 From a6825f1c1fa83b1e92b6715ee5771a4d6524d3b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Aug 2008 12:17:06 +0200 Subject: x86: hpet: workaround SB700 BIOS AMD SB700 based systems with spread spectrum enabled use a SMM based HPET emulation to provide proper frequency setting. The SMM code is initialized with the first HPET register access and takes some time to complete. During this time the config register reads 0xffffffff. We check for max. 1000 loops whether the config register reads a non 0xffffffff value to make sure that HPET is up and running before we go further. A counting loop is safe, as the HPET access takes thousands of CPU cycles. On non SB700 based machines this check is only done once and has no side effects. Based on a quirk patch from: crane cai Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334d..59fd3b6b1303 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -359,6 +359,7 @@ static int hpet_clocksource_register(void) int __init hpet_enable(void) { unsigned long id; + int i; if (!is_hpet_capable()) return 0; @@ -369,6 +370,29 @@ int __init hpet_enable(void) * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); + + /* + * AMD SB700 based systems with spread spectrum enabled use a + * SMM based HPET emulation to provide proper frequency + * setting. The SMM code is initialized with the first HPET + * register access and takes some time to complete. During + * this time the config register reads 0xffffffff. We check + * for max. 1000 loops whether the config register reads a non + * 0xffffffff value to make sure that HPET is up and running + * before we go further. A counting loop is safe, as the HPET + * access takes thousands of CPU cycles. On non SB700 based + * machines this check is only done once and has no side + * effects. + */ + for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { + if (i == 1000) { + printk(KERN_WARNING + "HPET config register value = 0xFFFFFFFF. " + "Disabling HPET\n"); + goto out_nohpet; + } + } + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; -- cgit v1.2.3 From 967060d00d7ab8e992963a966cd3d18156c02d55 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Aug 2008 15:43:33 -0700 Subject: x86, msr: fix NULL pointer deref due to msr_open on nonexistent CPUs msr_open tests for someone trying to open a device for a nonexistent CPU. However, the function always returns 0, not ret like it should, hence userspace can BUG the kernel trivially. This bug was introduced by the cdev lock_kernel pushdown patch last May. The BUG can be reproduced with these commands: # mknod fubar c 202 8 <-- pick a number less than NR_CPUS that is not the number of an online CPU # cat fubar Signed-off-by: Darrick J. Wong Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9fd809552447..e43938086885 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file) ret = -EIO; /* MSR not supported */ out: unlock_kernel(); - return 0; + return ret; } /* -- cgit v1.2.3 From ed4e5ec177d20504c51aebb93db12d57716cde9c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - use SET_APIC_DEST_FIELD instead of hardcoded shift Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 69a876be506f..77c5e5eb820e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,7 +150,7 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -- cgit v1.2.3 From 36fef0949c14445d231a68663e8a01860f7caca3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - unify disable_apic_timer Get rid of local_apic_timer_disabled and use disable_apic_timer instead. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++----- arch/x86/kernel/apic_64.c | 16 +++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 12b154822bce..6af20dd12c96 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -63,7 +63,7 @@ int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -574,7 +574,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (local_apic_timer_disabled) { + if (disable_apic_timer) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -1699,12 +1699,19 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); -static int __init parse_disable_lapic_timer(char *arg) +static int __init parse_disable_apic_timer(char *arg) { - local_apic_timer_disabled = 1; + disable_apic_timer = 1; return 0; } -early_param("nolapic_timer", parse_disable_lapic_timer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 77c5e5eb820e..1e925be861e4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -45,6 +45,7 @@ #include #include +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -1583,14 +1584,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static __init int setup_noapictimer(char *str) +static int __init parse_disable_apic_timer(char *arg) { - if (str[0] != ' ' && str[0] != 0) - return 0; disable_apic_timer = 1; - return 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; } -__setup("noapictimer", setup_noapictimer); +early_param("nolapic_timer", parse_nolapic_timer); static __init int setup_apicpmtimer(char *s) { -- cgit v1.2.3 From f07f4f9046121ac803bc2f0ded3d77b7c2ab481b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - unify __setup_APIC_LVTT To be able to unify this function we RE-introduce APIC_DIVISOR for 64bit mode. This snipped was eliminated in some time ago in a sake of clenup but now we need it again since it allow up to get rid of #ifdef(s). And lapic_is_integrated call is added in apic_64.c but since we always have APIC integrated on 64bit cpu compiler will ignore this call. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e925be861e4..ac399d0f65c0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,9 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* Clock divisor is set to 1 */ +#define APIC_DIVISOR 1 + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -262,6 +265,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -276,7 +282,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -446,7 +452,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = result / HZ; + calibration_result = (result * APIC_DIVISOR) / HZ; /* * Do a sanity check on the APIC calibration result -- cgit v1.2.3 From 9ce122c6e55c44ae9a4c4c777579b87d83e7f898 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - do not clear APIC twice in lapic_shutdown There is no need to clear APIC twice since disable_local_APIC will clear it anyway. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6af20dd12c96..a151d66f948c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -830,10 +830,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); - clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); + else + clear_local_APIC(); local_irq_restore(flags); } -- cgit v1.2.3 From 64e474d168e3cf31e44890b4947644d361f84e43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - get rid of local_apic_timer_verify_ok We are able to use clock_event_device as it's done in 64bit apic code so lets get rid of local_apic_timer_verify_ok variable. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a151d66f948c..7783c113be24 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -60,8 +60,6 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -301,7 +299,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned int v; /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); @@ -514,7 +512,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - local_apic_timer_verify_ok = 1; + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -548,11 +546,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - local_apic_timer_verify_ok = 0; + levt->features |= CLOCK_EVT_FEAT_DUMMY; } else local_irq_enable(); - if (!local_apic_timer_verify_ok) { + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; -- cgit v1.2.3 From c93baa1ae51cdba25a5f5ad37b2348e700e75daf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - unify verify_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7783c113be24..7ef7c782a42d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -878,6 +878,12 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; /* * The next two are just to see if we have sane values. -- cgit v1.2.3 From 296cb9511dcc3895fda84d0cd5b411bd926e4bb3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:23 +0200 Subject: x86: apic - unify sync_Arb_IDs Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +-- arch/x86/kernel/apic_64.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7ef7c782a42d..d07488993ee7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -915,8 +915,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, - APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac399d0f65c0..41aff3460635 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -742,8 +742,11 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - if (modern_apic()) + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* @@ -752,8 +755,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 519c31bacf78a969efa8d2e55ed8862848f28590 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:15 +0200 Subject: x86, AMD IOMMU: use status bit instead of memory write-back for completion wait Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 22d7d050905d..028e945c68ad 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -101,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret; + int ret, ready = 0; + unsigned status = 0; struct iommu_cmd cmd; - volatile u64 ready = 0; - unsigned long ready_phys = virt_to_phys(&ready); unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = upper_32_bits(ready_phys); - cmd.data[2] = 1; /* value written to 'ready' */ + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); iommu->need_sync = 0; @@ -122,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu) while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; - cpu_relax(); + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; } + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); -- cgit v1.2.3 From 9f5f5fb35d2934fe7dc0cb019854a030efd10cd7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:16 +0200 Subject: x86, AMD IOMMU: initialize device table properly This patch adds device table initializations which forbids memory accesses for devices per default and disables all page faults. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index d9a9da597e79..ceba33811537 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -800,6 +800,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table) return 0; } +/* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table(void) +{ + u16 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -931,6 +946,9 @@ int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; + /* init the device table */ + init_device_table(); + /* * let all alias entries point to itself */ -- cgit v1.2.3 From 8a456695c5020d6317f9c7af190999e9414b0d3e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:17 +0200 Subject: x86m AMD IOMMU: cleanup: replace LOW_U32 macro with generic lower_32_bits Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 028e945c68ad..de39e1f2ede5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, address &= PAGE_MASK; CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; - cmd.data[2] = LOW_U32(address); + cmd.data[2] = lower_32_bits(address); cmd.data[3] = upper_32_bits(address); if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; -- cgit v1.2.3 From 129d6aba444d1e99d4cbfb9866a4652912426b65 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:18 +0200 Subject: x86, AMD IOMMU: initialize dma_ops after sysfs registration If sysfs registration fails all memory used by IOMMU is freed. This happens after dma_ops initialization and the functions will access the freed memory then. Fix this by initializing dma_ops after the sysfs registration. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index ceba33811537..a69cc0f52042 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -972,15 +972,15 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; - ret = amd_iommu_init_dma_ops(); + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; - ret = sysdev_class_register(&amd_iommu_sysdev_class); + ret = sysdev_register(&device_amd_iommu); if (ret) goto free; - ret = sysdev_register(&device_amd_iommu); + ret = amd_iommu_init_dma_ops(); if (ret) goto free; -- cgit v1.2.3 From dcc984166870150709f0c645b521a47becd9a047 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 14 Aug 2008 16:32:15 -0400 Subject: x86, perfctr: don't use CCCR_OVF_PMI1 on Pentium 4Ds Currently, setup_p4_watchdog() use CCCR_OVF_PMI1 to enable the counter overflow interrupts to the second logical core. But this bit doesn't work on Pentium 4 Ds (model 4, stepping 4) and this patch avoids its use on these processors. Tested on 4 different machines that have this specific model with success. Signed-off-by: Aristeu Rozanski Cc: jvillalovos@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index de7439f82b92..05cc22dbd4ff 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P4_IQ_PERFCTR1; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); } evntsel = P4_ESCR_EVENT_SELECT(0x3F) -- cgit v1.2.3 From 394a15051c33f2b18e72f42283b36a9388fa414b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 14 Aug 2008 09:11:26 -0500 Subject: x86: invalidate caches before going into suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU core is shut down, all of its caches need to be flushed to prevent stale data from causing errors if the core is resumed. Current Linux suspend code performs an assignment after the flush, which can add dirty data back to the cache.  On some AMD platforms, additional speculative reads have caused crashes on resume because of this dirty data. Relocate the cache flush to be the very last thing done before halting.  Tie into an assembly line so the compile will not reorder it.  Add some documentation explaining what is going on and why we're doing this. Signed-off-by: Mark Langsdorf Acked-by: Mark Borden Acked-by: Michael Hohmuth Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 5 ++--- arch/x86/kernel/process_64.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 53bc653ed5ca..3b7a1ddcc0bc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -95,7 +95,6 @@ static inline void play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; @@ -104,8 +103,8 @@ static inline void play_dead(void) * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7d9a16..71553b664e2a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) -- cgit v1.2.3 From 15636668449d4135ac77a79715ba430a81aed16d Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Fri, 15 Aug 2008 08:36:14 -0400 Subject: x86, NMI: fix watchdog failure message > it just won't work at boot time - the second logic unit will be stuck: > > Booting processor 1/2 APIC 0x1 > Initializing CPU#1 > Calibrating delay using timer specific routine.. 5586.12 BogoMIPS (lpj=2793063) > CPU: Trace cache: 12K uops, L1 D cache: 16K > CPU: L2 cache: 1024K > CPU: Physical Processor ID: 0 > CPU: Processor Core ID: 1 > CPU1: Thermal monitoring enabled (TM1) > Intel(R) Pentium(R) D CPU 2.80GHz stepping 04 > Brought up 2 CPUs > testing NMI watchdog ... <4>WARNING: CPU#1: NMI appears to be stuck (0->0)! while at it... - fix that newline Signed-off-by: Aristeu Rozanski Cc: jvillalo@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac6d51222e7d..919473ad4a29 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -142,11 +142,15 @@ int __init check_nmi_watchdog(void) if (!per_cpu(wd_enabled, cpu)) continue; if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { + printk("\n"); printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + printk(KERN_WARNING "Please report this to " + "linux-kernel@vger.kernel.org and attach " + "the output of 'dmesg' command.\n"); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } -- cgit v1.2.3 From 8bb851900f5d0a79d3fddac808cc670d9894ef67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Aug 2008 15:34:32 +0200 Subject: x86, nmi: clean UP NMI watchdog failure message clean up the failure message - and redirect people to bugzilla instead of lkml. Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 919473ad4a29..abb78a2cc4ad 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) } #endif +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); +} + int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; @@ -141,19 +158,8 @@ int __init check_nmi_watchdog(void) for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("\n"); - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - printk(KERN_WARNING "Please report this to " - "linux-kernel@vger.kernel.org and attach " - "the output of 'dmesg' command.\n"); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); } endflag = 1; if (!atomic_read(&nmi_active)) { -- cgit v1.2.3 From 516cbf3730c49739629d66313b20bdc50c98aa2c Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Tue, 12 Aug 2008 12:52:36 -0700 Subject: x86, bootup: add built-in kernel command line for x86 (v2) Allow x86 to support a built-in kernel command line. The built-in command line can override the one provided by the boot loader, for those cases where the boot loader is broken or it is difficult to change the command line in the the boot loader. H. Peter Anvin wrote: > Ingo Molnar wrote: >> Best would be to make it really apparent in the code that nothing >> changes if this config option is not set. Preferably there should be >> no extra code at all in that case. >> > > I would like to see this: [...Nested ifdefs...] OK. This version changes absolutely nothing if CONFIG_CMDLINE_BOOL is not set (the default). Also, no space is appended even when CONFIG_CMDLINE_BOOL is set, but the builtin string is empty. This is less sloppy all the way around, IMHO. Note that I use the same option names as on other arches for this feature. [ mingo@elte.hu: build fix ] Signed-off-by: Tim Bird Cc: Matt Mackall Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3fbcbd..2f31cddd27b7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -223,6 +223,9 @@ unsigned long saved_video_mode; #define RAMDISK_LOAD_FLAG 0x4000 static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -673,6 +676,19 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -- cgit v1.2.3 From 2bd455dbfebfd632a8dcf1d3d1612737986fde0a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 4 Aug 2008 11:26:38 +0800 Subject: x86: remove nesting CONFIG_HOTPLUG_CPU prefill_possible_map() is defined inside CONFIG_HOTPLUG_CPU, so the nesting CONFIG_HOTPLUG_CPU is just redundant. Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4f..d5e19c362046 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1271,16 +1271,13 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; -#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } -#else - additional_cpus = 0; -#endif + possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; -- cgit v1.2.3 From 9744f5a32853642f8ed0749a1c9ed8cf9c9c9dc4 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 3 Aug 2008 19:25:48 +0200 Subject: x86, acpi: cleanup, temp_stack is used only when CONFIG_SMP is set fix: arch/x86/kernel/acpi/sleep.c:24: warning: 'temp_stack' defined but not used [ Sven Wegener : fix build bug ] Signed-off-by: Marcin Slusarz Acked-by: Pavel Machek Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fa2161d5003b..81e5ab6542d8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; /* address in low memory of the wakeup routine. */ static unsigned long acpi_realmode; -#ifdef CONFIG_64BIT +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) static char temp_stack[10240]; #endif -- cgit v1.2.3 From 2070dae10f50ec244f58292436ace9a3f9dc1d71 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:24:06 +0200 Subject: x86: coding style fixes to arch/x86/kernel/bios_uv.c paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/bios_uv.o.* 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.after 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd55391c..fdd585f9c53d 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,11 +25,11 @@ x86_bios_strerror(long status) { const char *str; switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; + case 0: str = "Call completed without error"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + default: str = "Unknown BIOS status code"; break; } return str; } -- cgit v1.2.3 From d9336a9b47d57db835453968efbd0d5cedfe0260 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:25:43 +0200 Subject: x86: coding style fixes to arch/x86/kernel/paravirt_patch_32.c Before: total: 3 errors, 1 warnings, 49 lines checked After: total: 2 errors, 1 warnings, 49 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/paravirt_patch_32.o.* a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.after a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt_patch_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 58262218781b..9fe644f4861d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, start = start_##ops##_##x; \ end = end_##ops##_##x; \ goto patch_site - switch(type) { + switch (type) { PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, restore_fl); -- cgit v1.2.3 From 0d5cdc97e242a5589e5dca23277675f4b4482490 Mon Sep 17 00:00:00 2001 From: Jens Rottmann Date: Mon, 4 Aug 2008 14:40:16 +0200 Subject: x86, geode-mfgpt: check IRQ before using MFGPT as clocksource Adds a simple IRQ autodetection to the AMD Geode MFGPT driver, and more importantly, adds some checks, if IRQs can actually be received on the chosen line. This fixes cases where MFGPT is selected as clocksource though not producing any ticks, so the kernel simply starves during boot. Signed-off-by: Jens Rottmann Cc: Andres Salomon Cc: linux-geode@bombadil.infradead.org Cc: Jordan Crouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/mfgpt_32.c | 52 +++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f488..3b599518c322 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -33,6 +33,8 @@ #include #include +#define MFGPT_DEFAULT_IRQ 7 + static struct mfgpt_timer_t { unsigned int avail:1; } mfgpt_timers[MFGPT_MAX_TIMERS]; @@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) } EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); -int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) +int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) { - u32 val, dummy; - int offset; + u32 zsel, lpc, dummy; + int shift; if (timer < 0 || timer >= MFGPT_MAX_TIMERS) return -EIO; - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) return -EIO; - rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = MFGPT_DEFAULT_IRQ; - offset = (timer % 4) * 4; - - val &= ~((0xF << offset) | (0xF << (offset + 16))); + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + /* All chosen and checked - go for it */ + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; if (enable) { - val |= (irq & 0x0F) << (offset); - val |= (irq & 0x0F) << (offset + 16); + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); } - wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); return 0; } @@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; static u16 mfgpt_event_clock; -static int irq = 7; +static int irq; static int __init mfgpt_setup(char *str) { get_option(&str, &irq); @@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) mfgpt_event_clock = timer; /* Set up the IRQ on the MFGPT side */ - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); return -EIO; } @@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) &mfgpt_clockevent); printk(KERN_INFO - "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); + "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", + timer, irq); clockevents_register_device(&mfgpt_clockevent); return 0; err: - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); printk(KERN_ERR "mfgpt-timer: Unable to set up the MFGPT clock source\n"); return -EIO; -- cgit v1.2.3 From d33dcb9e7d272cc3171dcc3a21049902185ab03d Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:45 +0200 Subject: x86: Microcode patch loader style corrections Style corrections to main microcode module. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 28dba1a6e4aa..39961bb83293 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/*#define DEBUG pr_debug */ #include #include #include @@ -116,7 +116,7 @@ EXPORT_SYMBOL_GPL(user_buffer); unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); -static int do_microcode_update (void) +static int do_microcode_update(void) { long cursor = 0; int error = 0; @@ -158,18 +158,20 @@ out: return error; } -static int microcode_open (struct inode *unused1, struct file *unused2) +static int microcode_open(struct inode *unused1, struct file *unused2) { cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { ssize_t ret; if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", + num_physpages); return -EINVAL; } @@ -201,7 +203,7 @@ static struct miscdevice microcode_dev = { .fops = µcode_fops, }; -static int __init microcode_dev_init (void) +static int __init microcode_dev_init(void) { int error; @@ -216,7 +218,7 @@ static int __init microcode_dev_init (void) return 0; } -static void microcode_dev_exit (void) +static void microcode_dev_exit(void) { misc_deregister(µcode_dev); } @@ -224,7 +226,7 @@ static void microcode_dev_exit (void) MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #else #define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) +#define microcode_dev_exit() do { } while (0) #endif /* fake device for request_firmware */ @@ -451,7 +453,7 @@ int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -void __exit microcode_exit (void) +void __exit microcode_exit(void) { microcode_dev_exit(); -- cgit v1.2.3 From 636a31781684d0f49208aa163f1a5a3a74210eb4 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:46 +0200 Subject: x86: Fixed NULL function pointer dereference in AMD microcode patch loader. Dereference took place in code part responsible for manual installation of microcode patches through /dev/cpu/microcode. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 39961bb83293..ad136ad99cb3 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -127,7 +127,8 @@ static int do_microcode_update(void) old = current->cpus_allowed; while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_ops->microcode_sanity_check(new_mc); + if (microcode_ops->microcode_sanity_check != NULL) + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* -- cgit v1.2.3 From c9c3dddd8f9a05b25d4ce53e8e80cc0ea1759d18 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Aug 2008 03:51:38 +0400 Subject: x86_64: remove empty lines from stack traces/oopses Signed-off-by: Alexey Dobriyan Cc: ak@suse.de Cc: akpm@osdl.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420c..8b5b3b81d437 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -339,9 +339,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -386,6 +385,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -443,7 +443,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); -- cgit v1.2.3 From 66d4bdf22b8652cda215e2653c8bbec7a767ed57 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 31 Jul 2008 16:48:31 +0100 Subject: x86-64: fix overlap of modules and fixmap areas Plus add a build time check so this doesn't go unnoticed again. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1b318e903bf6..9bfc4d72fb2e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); -- cgit v1.2.3 From 163f6876f5c3ff8215e900b93779e960a56b3694 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:22 -0700 Subject: kexec jump: rename KEXEC_CONTROL_CODE_SIZE to KEXEC_CONTROL_PAGE_SIZE Rename KEXEC_CONTROL_CODE_SIZE to KEXEC_CONTROL_PAGE_SIZE, because control page is used for not only code on some platform. For example in kexec jump, it is used for data and stack too. [akpm@linux-foundation.org: unbreak powerpc and arm, finish conversion] Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Ingo Molnar Cc: Russell King Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 9fe478d98406..466450167dea 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -78,7 +78,7 @@ static void load_segments(void) /* * A architecture hook called to validate the * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE * have been allocated, but the segments have yet * been copied into the kernel. * -- cgit v1.2.3 From fb45daa69d287b394eca1619b3fadff7c0215c71 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:23 -0700 Subject: kexec jump: check code size in control page Kexec/Kexec-jump require code size in control page is less than PAGE_SIZE/2. This patch add link-time checking for this. ASSERT() of ld link script is used as the link-time checking mechanism. [akpm@linux-foundation.org: build fix] Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Acked-by: Vivek Goyal Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 2 +- arch/x86/kernel/relocate_kernel_32.S | 10 +++++++--- arch/x86/kernel/vmlinux_32.lds.S | 8 ++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 466450167dea..5c8e7735c896 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -138,7 +138,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE/2); + memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 703310a99023..6f50664b2ba5 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -20,10 +20,11 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAE_PGD_ATTR (_PAGE_PRESENT) -/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are - * used to save some data for jumping back +/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE + * ~ control_page + PAGE_SIZE are used as data storage and stack for + * jumping back */ -#define DATA(offset) (PAGE_SIZE/2+(offset)) +#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) /* Minimal CPU state */ #define ESP DATA(0x0) @@ -376,3 +377,6 @@ swap_pages: popl %ebx popl %ebp ret + + .globl kexec_control_code_size +.set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d2..af5bdad84604 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -209,3 +209,11 @@ SECTIONS DWARF_DEBUG } + +#ifdef CONFIG_KEXEC +/* Link time checks */ +#include + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif -- cgit v1.2.3 From 3122c331190e9d1622bf1c8cf6ce3b17cca67c9e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:26 -0700 Subject: kexec jump: fix for ftrace Ftrace depends on some processor state that we destroyed during kexec and restored by restore_processor_state(). So save_processor_state() and restore_processor_state() are moved into machine_kexec() and ftrace is restored after restore_processor_state(). Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5c8e7735c896..0732adba05ca 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + int save_ftrace_enabled; asmlinkage unsigned long (*relocate_kernel_ptr)(unsigned long indirection_page, unsigned long control_page, @@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image) unsigned int has_pae, unsigned int preserve_context); - tracer_disable(); +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + save_processor_state(); +#endif + + save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); @@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image) (unsigned long)page_list, image->start, cpu_has_pae, image->preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + restore_processor_state(); +#endif + + __ftrace_enabled_restore(save_ftrace_enabled); } void arch_crash_save_vmcoreinfo(void) -- cgit v1.2.3 From 48e2bd56b1d1ae4b95fb21be778927b64d5c4235 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sat, 2 Aug 2008 12:50:37 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix coding style of traps_64.c with improvements suggested by Ingo. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index fe36d96ba70b..5df5d2e97ece 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -84,8 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? - "" : "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", + address, reliable ? "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,8 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / - EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -363,8 +363,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - /* debugging aid: "show_stack(NULL, NULL);" prints the - back trace for this cpu. */ + /* + * debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu. + */ if (sp == NULL) { if (task) -- cgit v1.2.3 From fc0091b3c86396afc8e6c273aff21671cf882ee1 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 15 Aug 2008 17:21:14 +0100 Subject: x86: change init_gdt to update the gdt via write_gdt, rather than a direct write. By writing directly, a memory access violation can occur whilst hotplugging a CPU if the entry was previously marked read-only. Signed-off-by: Alex Nixon Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpcommon.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca0..397e309839dd 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -8,18 +8,21 @@ DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ +/* + * Initialize the CPU's GDT. This is either the boot CPU doing itself + * (still using the master per-cpu area), or a CPU doing it for a + * secondary which will soon come up. + */ __cpuinit void init_gdt(int cpu) { - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct gdt; - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], - __per_cpu_offset[cpu], 0xFFFFF, + pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; - gdt[GDT_ENTRY_PERCPU].s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; -- cgit v1.2.3 From f88f07e0f0fd6376e081b10930d272a08fbf082f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 14 Aug 2008 16:58:15 -0400 Subject: x86: alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a kernel thread is preempted in single-cpu mode right after the NOP (nop about to be turned into a lock prefix), then we CPU hotplug a CPU, and then the thread is scheduled back again, a SMP-unsafe atomic operation will be used on shared SMP variables, leading to corruption. No corruption would happen in the reverse case : going from SMP to UP is ok because we split a bit instruction into tiny pieces, which does not present this condition. Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment override prefix should fix this issue. Since the default of the atomic instructions is to use the DS segment anyway, it should not affect the behavior. The exception to this are references that use ESP/RSP and EBP/RBP as the base register (they will use the SS segment), however, in Linux (a) DS == SS at all times, and (b) we do not distinguish between segment violations reported as #SS as opposed to #GP, so there is no need to disassemble the instruction to figure out the suitable segment. This patch assumes that the 0x3E prefix will leave atomic operations as-is (thus assuming they normally touch data in the DS segment). Since there seem to be no obvious ill-use of other segment override prefixes for atomic operations, it should be safe. It can be verified with a quick grep -r LOCK_PREFIX include/asm-x86/ grep -A 1 -r LOCK_PREFIX arch/x86/ Taken from This source : AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions States "Instructions that Reference a Non-Stack Segment—If an instruction encoding references any base register other than rBP or rSP, or if an instruction contains an immediate offset, the default segment is the data segment (DS). These instructions can use the segment-override prefix to select one of the non-default segments, as shown in Table 1-5." Therefore, forcing the DS segment on the atomic operations, which already use the DS segment, should not change. This source : http://wiki.osdev.org/X86_Instruction_Encoding States "In 64-bit the CS, SS, DS and ES segment overrides are ignored." Confirmed by "AMD 64-Bit Technology" A.7 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/x86-64_overview.pdf "In 64-bit mode, the DS, ES, SS and CS segment-override prefixes have no effect. These four prefixes are no longer treated as segment-override prefixes in the context of multipleprefix rules. Instead, they are treated as null prefixes." This patch applies to 2.6.27-rc2, but would also have to be applied to earlier kernels (2.6.26, 2.6.25, ...). Performance impact of the fix : tests done on "xaddq" and "xaddl" shows it actually improves performances on Intel Xeon, AMD64, Pentium M. It does not change the performance on Pentium II, Pentium 3 and Pentium 4. Xeon E5405 2.0GHz : NR_TESTS 10000000 test empty cycles : 162207948 test test 1-byte nop xadd cycles : 170755422 test test DS override prefix xadd cycles : 170000118 * test test LOCK xadd cycles : 472012134 AMD64 2.0GHz : NR_TESTS 10000000 test empty cycles : 146674549 test test 1-byte nop xadd cycles : 150273860 test test DS override prefix xadd cycles : 149982382 * test test LOCK xadd cycles : 270000690 Pentium 4 3.0GHz NR_TESTS 10000000 test empty cycles : 290001195 test test 1-byte nop xadd cycles : 310000560 test test DS override prefix xadd cycles : 310000575 * test test LOCK xadd cycles : 1050103740 Pentium M 2.0GHz NR_TESTS 10000000 test empty cycles : 180000523 test test 1-byte nop xadd cycles : 320000345 test test DS override prefix xadd cycles : 310000374 * test test LOCK xadd cycles : 480000357 Pentium 3 550MHz NR_TESTS 10000000 test empty cycles : 510000231 test test 1-byte nop xadd cycles : 620000128 test test DS override prefix xadd cycles : 620000110 * test test LOCK xadd cycles : 800000088 Pentium II 350MHz NR_TESTS 10000000 test empty cycles : 200833494 test test 1-byte nop xadd cycles : 340000130 test test DS override prefix xadd cycles : 340000126 * test test LOCK xadd cycles : 530000078 Speed test modules can be found at http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed-32.c http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed.c Macro-benchmarks 2.0GHz E5405 Core 2 dual Quad-Core Xeon Summary * replace smp lock prefixes with DS segment selector prefixes no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 33.94 +/- 0.07 34.91 +/- 0.27 2.8 % hackbench 50 2.99 +/- 0.01 3.74 +/- 0.01 25.1 % * replace smp lock prefixes with 0x90 nops no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 34.16 +/- 0.32 34.91 +/- 0.27 2.2 % hackbench 50 3.00 +/- 0.01 3.74 +/- 0.01 24.7 % Detail : 1 CPU, replace smp lock prefixes with DS segment selector prefixes make -j1 kernel/ real 0m34.067s user 0m30.630s sys 0m2.980s real 0m33.867s user 0m30.582s sys 0m3.024s real 0m33.939s user 0m30.738s sys 0m2.876s real 0m33.913s user 0m30.806s sys 0m2.808s avg : 33.94s std. dev. : 0.07s hackbench 50 Time: 2.978 Time: 2.982 Time: 3.010 Time: 2.984 Time: 2.982 avg : 2.99 std. dev. : 0.01 1 CPU, noreplace-smp make -j1 kernel/ real 0m35.326s user 0m30.630s sys 0m3.260s real 0m34.325s user 0m30.802s sys 0m3.084s real 0m35.568s user 0m30.722s sys 0m3.168s real 0m34.435s user 0m30.886s sys 0m2.996s avg.: 34.91s std. dev. : 0.27s hackbench 50 Time: 3.733 Time: 3.750 Time: 3.761 Time: 3.737 Time: 3.741 avg : 3.74 std. dev. : 0.01 1 CPU, replace smp lock prefixes with 0x90 nops make -j1 kernel/ real 0m34.139s user 0m30.782s sys 0m2.820s real 0m34.010s user 0m30.630s sys 0m2.976s real 0m34.777s user 0m30.658s sys 0m2.916s real 0m33.924s user 0m30.634s sys 0m2.924s real 0m33.962s user 0m30.774s sys 0m2.800s real 0m34.141s user 0m30.770s sys 0m2.828s avg : 34.16 std. dev. : 0.32 hackbench 50 Time: 2.999 Time: 2.994 Time: 3.004 Time: 2.991 Time: 2.988 avg : 3.00 std. dev. : 0.01 I did more runs (20 runs of each) to compare the nop case to the DS prefix case. Results in seconds. They actually does not seems to show a significant difference. NOP 34.155 33.955 34.012 35.299 35.679 34.141 33.995 35.016 34.254 33.957 33.957 34.008 35.013 34.494 33.893 34.295 34.314 34.854 33.991 34.132 DS 34.080 34.304 34.374 35.095 34.291 34.135 33.940 34.208 35.276 34.288 33.861 33.898 34.610 34.709 33.851 34.256 35.161 34.283 33.865 35.078 Used http://www.graphpad.com/quickcalcs/ttest1.cfm?Format=C to do the T-test (yeah, I'm lazy) : Group Group One (DS prefix) Group Two (nops) Mean 34.37815 34.37070 SD 0.46108 0.51905 SEM 0.10310 0.11606 N 20 20 P value and statistical significance: The two-tailed P value equals 0.9620 By conventional criteria, this difference is considered to be not statistically significant. Confidence interval: The mean of Group One minus Group Two equals 0.00745 95% confidence interval of this difference: From -0.30682 to 0.32172 Intermediate values used in calculations: t = 0.0480 df = 38 standard error of difference = 0.155 So, unless these calculus are completely bogus, the difference between the nop and the DS case seems not to be statistically significant. Signed-off-by: Mathieu Desnoyers Acked-by: H. Peter Anvin CC: Linus Torvalds CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..7ead11f3732d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -241,25 +241,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ + /* turn DS segment override prefix into lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; - char insn[1]; if (noreplace_smp) return; - add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - text_poke(*ptr, insn, 1); + /* turn lock prefix into DS segment override prefix */ + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; } -- cgit v1.2.3 From 6f6da97faf29f87b3980a6992fb8cab44b4c444d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:19 +0400 Subject: x86: apic - sync_Arb_IDs style fixup No changes on binary level Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d07488993ee7..60575c05151a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -909,13 +909,15 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; + /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 41aff3460635..72e94ab0e364 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -755,7 +755,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 638c0411922540deaf8797cacf73513b17618405 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:18 +0400 Subject: x86: apic - unify init_bsp_APIC - remove redundant read of APIC_LVR register in 64bit mode - APIC is always integrated for 64bit mode so gcc will eliminate lapic_is_integrated call Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60575c05151a..16d9721788c1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -925,7 +925,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned long value; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -946,11 +946,13 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else +#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 72e94ab0e364..99d18b8976a5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -773,8 +773,6 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; - value = apic_read(APIC_LVR); - /* * Do not trust the local APIC being empty at bootup. */ @@ -786,7 +784,15 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - value |= APIC_SPIV_FOCUS_DISABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -795,6 +801,8 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } -- cgit v1.2.3 From 6764014bc8bb4849f6a4f336477e873ad5861ed2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:50 +0400 Subject: x86: apic - unify clear_local_APIC - Remove redundant masking of APIC_LVTTHMR register in apic_32.c - Add masking of APIC_LVTTHMR register to apic_64.c. We use a bit complicated #ifdef here: CONFIG_X86_MCE_P4THERMAL is 32bit specific and X86_MCE_INTEL is 64bit specific so the appropriate config variable will be set by Kconfig. - the APIC_ESR register clearing in apic_64.c now uses not straightforward way but this is allowed tradeoff. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +----- arch/x86/kernel/apic_64.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 16d9721788c1..3131603a4d6a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -754,7 +754,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -771,10 +771,6 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 99d18b8976a5..d834b7583624 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -630,6 +630,13 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif /* * Clean APIC state for other OSs: */ @@ -640,8 +647,14 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /** -- cgit v1.2.3 From 92206c909ad1d4f9c355b4842722d5a355d6b76b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:51 +0400 Subject: x86: apic - unify lapic_resume Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 29 ++++++++++++++++++----------- arch/x86/kernel/apic_64.c | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3131603a4d6a..3d40213d3af2 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1606,16 +1606,21 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1625,7 +1630,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1639,7 +1644,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d834b7583624..e542a2d08de5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1412,13 +1412,22 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - if (!x2apic) { + +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); - } else - enable_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1428,7 +1437,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1442,7 +1451,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } -- cgit v1.2.3 From 24968cfdd4c3cf61338495dd0f9bb8a6907d2087 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:52 +0400 Subject: x86: apic - unify lapic_suspend Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d40213d3af2..6cb8aaaf10f5 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1582,7 +1582,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index e542a2d08de5..13dea935b4eb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1390,10 +1390,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); -- cgit v1.2.3 From 274cfe5912eebe9d4e1a4c451fe617289a181fcf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:53 +0400 Subject: x86: apic - rearrange functions and comments Rearrange functions and comments to find differences easier. Also use apic_printk in setup_boot_APIC_clock for 64bit mode. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 69 ++++++++++++++++++++++++++--------------------- arch/x86/kernel/apic_64.c | 48 ++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6cb8aaaf10f5..9e8702eebd46 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -251,6 +251,9 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -279,6 +282,36 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Program the next event, relative to now */ @@ -298,7 +331,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used for broadcast ? */ + /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; @@ -680,35 +713,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - /* * Local APIC start and shutdown */ @@ -1542,6 +1546,11 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 13dea935b4eb..46523c0cc6f6 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -81,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); +/* + * The local apic timer can be used for any function which is CPU local. + */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -127,6 +130,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -175,7 +183,6 @@ static struct apic_ops xapic_ops = { }; struct apic_ops __read_mostly *apic_ops = &xapic_ops; - EXPORT_SYMBOL_GPL(apic_ops); static void x2apic_wait_icr_idle(void) @@ -244,6 +251,10 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* + * Local APIC timer + */ + /* Clock divisor is set to 1 */ #define APIC_DIVISOR 1 @@ -257,7 +268,6 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -474,10 +484,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -489,7 +499,9 @@ void __init setup_boot_APIC_clock(void) return; } - printk(KERN_INFO "Using local APIC timer interrupts.\n"); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -508,6 +520,7 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -577,6 +590,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } @@ -1248,6 +1262,13 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1347,9 +1368,11 @@ int hard_smp_processor_id(void) #ifdef CONFIG_PM static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1458,6 +1481,11 @@ static int lapic_resume(struct sys_device *dev) return 0; } +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, -- cgit v1.2.3 From 9c803869f5f5e3d7ad94b2926474b2882e30073b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:54 +0400 Subject: x86: apic - unify lapic_is_integrated Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 9e8702eebd46..41134d4e6a66 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -128,7 +128,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46523c0cc6f6..18c0b8cd2376 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -111,11 +111,15 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a seperate chip + * Check, if the APIC is integrated or a separate chip */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif } /* -- cgit v1.2.3 From cf9768d7514d59b3179a886c4a47908d72e6cf76 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:55 +0400 Subject: x86: apic - unify xapic_icr_read Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 18c0b8cd2376..5e0738131e58 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -174,7 +174,7 @@ u64 xapic_icr_read(void) icr2 = apic_read(APIC_ICR2); icr1 = apic_read(APIC_ICR); - return (icr1 | ((u64)icr2 << 32)); + return icr1 | ((u64)icr2 << 32); } static struct apic_ops xapic_ops = { -- cgit v1.2.3 From 7b22ff5344fda666e0938e5261ea7b9a3dfce497 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 18 Aug 2008 00:36:18 +0900 Subject: x86 gart: allocate size-aligned address for alloc_coherent, v2 This patch changes GART IOMMU to return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines: The cpu return address and the DMA bus master address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk which is smaller than or equal to 64 kilobytes, the extent of the buffer you receive will not cross a 64K boundary. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cdab67849074..4d8efb05428d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(struct device *dev, int size) +static unsigned long alloc_iommu(struct device *dev, int size, + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; @@ -95,11 +96,12 @@ static unsigned long alloc_iommu(struct device *dev, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, align_mask); if (offset == -1) { need_flush = 1; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, + align_mask); } if (offset != -1) { next_bit = offset+size; @@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages); + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; if (iommu_page == -1) { @@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, paddr, size, dir); + dma_addr_t map; + unsigned long align_mask; + + align_mask = (1UL << get_order(size)) - 1; + map = dma_map_area(dev, paddr, size, dir, align_mask); flush_gart(); @@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = gart_map_simple(dev, paddr, size, dir); + bus = dma_map_area(dev, paddr, size, dir, 0); + flush_gart(); return bus; } @@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages); + unsigned long iommu_start = alloc_iommu(dev, pages, 0); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; -- cgit v1.2.3 From d554d9a4295dd0595d12eeccbc55d1f495b15176 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..46af71676738 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3 From 67d0c9ebdc9f5f356657146b4e862b2d745e9e78 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:12:37 +0200 Subject: x86: fix MP_processor_info section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1fe7): Section mismatch in reference from the function MP_processor_info() to the variable .init.data:x86_quirks The function __cpuinit MP_processor_info() references a variable __initdata x86_quirks. If x86_quirks is only used by MP_processor_info then annotate x86_quirks with a matching annotation. MP_processor_info uses x86_quirks which is __init and is used only from smp_read_mpc and construct_default_ISA_mptable which are __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 678090508a62..9942fc72b63c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; -- cgit v1.2.3 From c72a5efec1193faa2ef34c0bd48d7251a70ec934 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:11:13 +0200 Subject: x86: mmconf: fix section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1591): Section mismatch in reference from the function init_amd() to the function .init.text:check_enable_amd_mmconf_dmi() The function __cpuinit init_amd() references a function __init check_enable_amd_mmconf_dmi(). If check_enable_amd_mmconf_dmi is only used by init_amd then annotate check_enable_amd_mmconf_dmi with a matching annotation. check_enable_amd_mmconf_dmi is only called from init_amd which is __cpuinit Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b366..efc2f361fe85 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } -- cgit v1.2.3 From 39e00fe20aaad4326ed5e0e3221451732bc7f679 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:09:38 +0200 Subject: x86: mpparse.c: fix section mismatch warning WARNING: vmlinux.o(.text+0x118f7): Section mismatch in reference from the function construct_ioapic_table() to the function .init.text:MP_bus_info() The function construct_ioapic_table() references the function __init MP_bus_info(). This is often because construct_ioapic_table lacks a __init annotation or the annotation of MP_bus_info is wrong. construct_ioapic_table is called only from construct_default_ISA_mptable which is __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9942fc72b63c..b3fb430725cb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) -- cgit v1.2.3 From e532c06f2a835b5cc4f4166f467437d9b09c1d0e Mon Sep 17 00:00:00 2001 From: David Fries Date: Sun, 17 Aug 2008 23:03:40 -0500 Subject: x86: fix i486 suspend to disk CR4 oops arch/x86/power/cpu_32.c __save_processor_state calls read_cr4() only a i486 CPU doesn't have the CR4 register. Trying to read it produces an invalid opcode oops during suspend to disk. Use the safe rc4 reading op instead. If the value to be written is zero the write is skipped. arch/x86/power/hibernate_asm_32.S done: swapped the use of %eax and %ecx to use jecxz for the zero test and jump over store to %cr4. restore_image: s/%ecx/%eax/ to be consistent with done: In addition to __save_processor_state, acpi_save_state_mem, efi_call_phys_prelog, and efi_call_phys_epilog had checks added (acpi restore was in assembly and already had a check for non-zero). There were other reads and writes of CR4, but MCE and virtualization shouldn't be executed on a i486 anyway. Signed-off-by: David Fries Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/efi_32.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 81e5ab6542d8..426e5d91b63a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -86,7 +86,7 @@ int acpi_save_state_mem(void) #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4(); + header->pmode_cr4 = read_cr4_safe(); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13b..5cab48ee61a4 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = @@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = -- cgit v1.2.3 From d19fbfdfe6a7034c8b6a7062365780485ab5aeaa Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 17 Aug 2008 17:50:51 +0200 Subject: x86: silence section mismatch warning - get_local_pda Take out part of get_local_pda referencing __init function (free_bootmem) to new (static) function marked as __ref. It's safe to do because free_bootmem is called before __init sections are dropped. WARNING: vmlinux.o(.cpuinit.text+0x3cd7): Section mismatch in reference from the function get_local_pda() to the function .init.text:free_bootmem() The function __cpuinit get_local_pda() references a function __init free_bootmem(). If free_bootmem is only used by get_local_pda then annotate free_bootmem with a matching annotation. Signed-off-by: Marcin Slusarz Cc: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a8fb8a980fae..e139e617f422 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -756,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work) } #ifdef CONFIG_X86_64 + +/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ +static void __ref free_bootmem_pda(struct x8664_pda *oldpda) +{ + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); +} + /* * Allocate node local memory for the AP pda. * @@ -784,8 +792,7 @@ int __cpuinit get_local_pda(int cpu) if (oldpda) { memcpy(newpda, oldpda, size); - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, size); + free_bootmem_pda(oldpda); } newpda->in_bootmem = 0; -- cgit v1.2.3 From f71066624d5d91bf179a1ea25da0800b72e20c60 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 17 Aug 2008 17:50:52 +0200 Subject: x86, calgary: fix section mismatch warning - get_tce_space_from_tar WARNING: vmlinux.o(.text+0x27032): Section mismatch in reference from the function get_tce_space_from_tar() to the function .init.text:calgary_bus_has_devices() The function get_tce_space_from_tar() references the function __init calgary_bus_has_devices(). This is often because get_tce_space_from_tar lacks a __init annotation or the annotation of calgary_bus_has_devices is wrong. get_tce_space_from_tar is called only from __init function (calgary_init) and calls __init function (calgary_bus_has_devices). So annotate it properly. Signed-off-by: Marcin Slusarz Cc: Chandru Siddalingappa Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 02d19328525d..218d783ed7a8 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) * Function for kdump case. Get the tce tables from first kernel * by reading the contents of the base adress register of calgary iommu */ -static void get_tce_space_from_tar(void) +static void __init get_tce_space_from_tar(void) { int bus; void __iomem *target; -- cgit v1.2.3 From d5e629a6f88137fb77c4cc857be5ea7c3f27110d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 17 Aug 2008 21:12:27 -0700 Subject: x86: apic - unify lapic_resume - fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e0738131e58..ad532dccd11e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1446,6 +1446,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1456,6 +1457,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From 8bfcb3960fde049b863266dab8c3617bb5a541aa Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:20 +0200 Subject: x86: make movsl_mask definition non-CPU specific movsl_mask is currently defined in arch/x86/kernel/cpu/intel.c, which contains code specific to Intel CPUs. However, movsl_mask is used in the non-CPU specific code in arch/x86/lib/usercopy_32.c, which breaks the compilation when support for Intel CPUs is compiled out. This patch solves this problem by moving movsl_mask's definition close to its users in arch/x86/lib/usercopy_32.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f2569b8f8..5c8959b8a42e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,13 +23,6 @@ #include #endif -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ -- cgit v1.2.3 From 774400a3ba23b63f4de39e67ce6c4e48935809dc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:21 +0200 Subject: x86: move cmpxchg fallbacks to a generic place arch/x86/kernel/cpu/intel.c defines a few fallback functions (cmpxchg_*()) that are used when the CPU doesn't support cmpxchg and/or cmpxchg64 natively. However, while defined in an Intel-specific file, these functions are also used for CPUs from other vendors when they don't support cmpxchg and/or cmpxchg64. This breaks the compilation when support for Intel CPUs is disabled. This patch moves these functions to a new arch/x86/kernel/cpu/cmpxchg.c file, unconditionally compiled when X86_32 is enabled. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/cmpxchg.c | 72 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 64 -------------------------------------- 3 files changed, 73 insertions(+), 65 deletions(-) create mode 100644 arch/x86/kernel/cpu/cmpxchg.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaad3001..25b4c063fbf6 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,7 +5,7 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o -obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_64) += amd_64.o diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 000000000000..2056ccf572cc --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5c8959b8a42e..77618c717d76 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -307,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - /* arch_initcall(intel_cpu_init); */ -- cgit v1.2.3 From 8d02c2110b3fb8e2700b31596a582a2989fd72ba Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 5 Aug 2008 11:45:19 +0200 Subject: x86: configuration options to compile out x86 CPU support code This patch adds some configuration options that allow to compile out CPU vendor-specific code in x86 kernels (in arch/x86/kernel/cpu). The new configuration options are only visible when CONFIG_EMBEDDED is selected, as they are mostly interesting for space savings reasons. An example of size saving, on x86 with only Intel CPU support: text data bss dec hex filename 1125479 118760 212992 1457231 163c4f vmlinux.old 1121355 116536 212992 1450883 162383 vmlinux -4124 -2224 0 -6348 -18CC +/- However, I'm not exactly sure that the Kconfig wording is correct with regard to !64BIT / 64BIT. [ mingo@elte.hu: convert macro to inline ] Signed-off-by: Thomas Petazzoni Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25b4c063fbf6..a0fc6c144384 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,15 +7,16 @@ obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o -obj-$(CONFIG_X86_32) += amd.o -obj-$(CONFIG_X86_64) += amd_64.o -obj-$(CONFIG_X86_32) += cyrix.o -obj-$(CONFIG_X86_32) += centaur.o -obj-$(CONFIG_X86_64) += centaur_64.o -obj-$(CONFIG_X86_32) += transmeta.o -obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_64) += intel_64.o -obj-$(CONFIG_X86_32) += umc.o + +obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o +obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o +obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -- cgit v1.2.3 From b6c8051311e1a14d229df05ea39d0a1b2ce90cdd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:49 +0400 Subject: x86: apic - rearrange maxcpu definition Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 41134d4e6a66..8d1febf05da1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -114,6 +114,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static int enabled_via_apicbase; static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; + /* * Get the LAPIC version @@ -1459,8 +1461,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ad532dccd11e..46acb9b47ae7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -98,10 +98,10 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ -- cgit v1.2.3 From f1ee37891dab6014f6b0ec77b18bc07e2369a55f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:50 +0400 Subject: x86: apic - unify setup_boot_APIC_clock Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8d1febf05da1..80db3e0426c4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -612,6 +612,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; -- cgit v1.2.3 From 990b183e58cb513a62492b6218987750e106cbfb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:51 +0400 Subject: x86: apic - unify disable_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80db3e0426c4..13c4b79441da 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -796,7 +796,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned long value; + unsigned int value; clear_local_APIC(); @@ -808,6 +808,7 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -819,6 +820,7 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46acb9b47ae7..4fb903b2fc39 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -691,6 +691,20 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif } void lapic_shutdown(void) -- cgit v1.2.3 From fe4024dcb0c01e5399394d2807406a2c13fb1eb7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:52 +0400 Subject: x86: apic - unify lapic_shutdown Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 9 ++++++--- arch/x86/kernel/apic_64.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 13c4b79441da..d4efe86adc72 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -838,10 +838,13 @@ void lapic_shutdown(void) local_irq_save(flags); - if (enabled_via_apicbase) - disable_local_APIC(); - else +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fb903b2fc39..48806546d49f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -707,6 +707,12 @@ void disable_local_APIC(void) #endif } +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ void lapic_shutdown(void) { unsigned long flags; @@ -716,7 +722,13 @@ void lapic_shutdown(void) local_irq_save(flags); - disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } -- cgit v1.2.3 From 36c9d6742897fa414f51c4e9d0f20ab4e6bf942c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:53 +0400 Subject: x86: apic - unify connect_bsp_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ arch/x86/kernel/apic_64.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d4efe86adc72..6d230e9d0a7d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1387,6 +1387,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1401,6 +1402,7 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } +#endif enable_apic_mode(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 48806546d49f..5579e213b5d2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1285,10 +1285,26 @@ asmlinkage void smp_error_interrupt(void) } /** - * * connect_bsp_APIC - attach the APIC to the interrupt system - * */ + * connect_bsp_APIC - attach the APIC to the interrupt system + */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif enable_apic_mode(); } -- cgit v1.2.3 From c43da2f5e92fe3bcc256f0c0d6cb858368da5bd9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:54 +0400 Subject: x86: apic - unify lapic_setup_esr We use 32bit code former for 64bit mode since it's much better implementation and easier to merge. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 +++++++++---------- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6d230e9d0a7d..3c1562afa274 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -983,6 +983,16 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1003,16 +1013,7 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5579e213b5d2..d74abf7e92f7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -863,6 +863,45 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ @@ -968,18 +1007,6 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned maxlvt = lapic_get_maxlvt(); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); -} - void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); -- cgit v1.2.3 From c40aaec6868401671a0ca14ed77e9b2da2d1f223 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:55 +0400 Subject: x86: apic - unify __setup_APIC_LVTT Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- arch/x86/kernel/apic_64.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3c1562afa274..65419c7d437f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -248,8 +248,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 16 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else #define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -281,8 +285,8 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d74abf7e92f7..fe57db9f3fbb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -259,8 +259,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 1 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 #define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -291,9 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -- cgit v1.2.3 From c177b0bc03e0e11623e2099db42903fb0caf0fd3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:56 +0400 Subject: x86: apic - unify disconnect_bsp_APIC - just #ifdef added Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 66 ++++++++++++++++++++++++----------------------- arch/x86/kernel/apic_64.c | 23 +++++++++++++++-- 2 files changed, 55 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65419c7d437f..3095bb71eaed 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1420,6 +1420,7 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1431,47 +1432,48 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + return; + } +#endif - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + /* Go back to Virtual Wire compatibility mode */ + unsigned int value; - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled + * For LVT0 make it edge triggered, active high, + * external and enabled */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fe57db9f3fbb..0d969103fce7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1348,8 +1348,24 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1375,7 +1391,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* For LVT1 make it edge triggered, active high, nmi and enabled */ + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -- cgit v1.2.3 From 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:57 +0400 Subject: x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 ++++++++------- arch/x86/kernel/apic_64.c | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3095bb71eaed..c3a252b1a8be 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1480,7 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - physid_mask_t phys_cpu; /* * Validate version @@ -1493,9 +1492,6 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1512,17 +1508,19 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - if (apicid == boot_cpu_physical_apicid) + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - + } if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1542,7 +1540,9 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#ifdef CONFIG_SMP +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1555,6 +1555,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0d969103fce7..76c20773bedf 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1215,6 +1215,8 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1409,15 +1411,26 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -1437,6 +1450,29 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1448,6 +1484,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } +#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); -- cgit v1.2.3 From fa6b95fc7c6f2f3eb1560e1f33cd13197546c5a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:58 +0400 Subject: x86: apic - unify end_local_APIC_setup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++-- arch/x86/kernel/apic_64.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c3a252b1a8be..f1882329b9cf 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1155,13 +1155,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - unsigned long value; - lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, value); +#endif setup_apic_nmi_watchdog(NULL); apic_pm_activate(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 76c20773bedf..eec10b34dc49 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1014,6 +1014,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); +#endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } -- cgit v1.2.3 From 0b23e8cf553f5e706b0057363f1319867bcd1a7d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:59 +0400 Subject: x86: apic - unify local_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f1882329b9cf..af227bce23b1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -685,7 +685,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eec10b34dc49..a9ad2cb4ff95 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -567,7 +567,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } -- cgit v1.2.3 From 79af9bec60e6e218a1e48e8830d603d64a7fc441 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:00 +0400 Subject: x86: apic - unify apic_set_verbosity Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++++--- arch/x86/kernel/apic_64.c | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index af227bce23b1..acbc4dea2ee8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1768,13 +1768,24 @@ early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); static int __init apic_set_verbosity(char *arg) { - if (!arg) + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif return -EINVAL; + } - if (strcmp(arg, "debug") == 0) + if (strcmp("debug", arg) == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp(arg, "verbose") == 0) + else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a9ad2cb4ff95..999781810c09 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,27 +1759,6 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static __init int setup_disableapic(char *str) { disable_apic = 1; @@ -1824,6 +1803,31 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + static int __init lapic_insert_resource(void) { if (!apic_phys) -- cgit v1.2.3 From 789fa735712d726b5cfa5c6be57171b5637a4872 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:01 +0400 Subject: x86: apic - unify disableapic and nolapic setup handlers Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 11 +++++++++-- arch/x86/kernel/apic_64.c | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index acbc4dea2ee8..5680bda62f78 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1737,13 +1737,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init parse_nolapic(char *arg) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("nolapic", parse_nolapic); +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); static int __init parse_disable_apic_timer(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 999781810c09..7a317180ba2e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,7 +1759,7 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static __init int setup_disableapic(char *str) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1768,9 +1768,9 @@ static __init int setup_disableapic(char *str) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static __init int setup_nolapic(char *str) +static int __init setup_nolapic(char *arg) { - return setup_disableapic(str); + return setup_disableapic(arg); } early_param("nolapic", setup_nolapic); -- cgit v1.2.3 From 3415610b8e38b26b52a430b8846665c2d6bb3558 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:02 +0400 Subject: x86: apic - rearrange parse_lapic_timer_c2_ok Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 5680bda62f78..885e306420ac 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1752,6 +1752,13 @@ static int __init setup_nolapic(char *arg) } early_param("nolapic", setup_nolapic); +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init parse_disable_apic_timer(char *arg) { disable_apic_timer = 1; @@ -1766,13 +1773,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v1.2.3 From e75bedf415f300a08e9bbcc755784e488574a73e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:03 +0400 Subject: x86: apic - lapic_resume 32bit - unification fix Just add parenthesis to be identical of current 64bit implementation (so diff will not complain). Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 885e306420ac..e975562a76a5 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1646,6 +1646,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1656,6 +1657,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From 1b4ee4e4096d430c4c12516c1d30a6b0b4f9e9e4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 23:12:33 +0400 Subject: x86: apic - compilation warnings fix Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 +++++++++------ arch/x86/kernel/apic_64.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e975562a76a5..b8d80c291650 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1162,11 +1162,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1426,6 +1428,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1443,7 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7a317180ba2e..37e037606f30 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1020,11 +1020,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1363,6 +1365,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1380,7 +1384,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); -- cgit v1.2.3 From 467cd0529a480c9c25f06154b788d1d1ba77828a Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 18 Aug 2008 16:56:29 -0700 Subject: x86: early_printk.c trivial sparse fixes arch/x86/kernel/early_printk.c:404:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:404:13: expected restricted __le16 [assigned] [usertype] wValue arch/x86/kernel/early_printk.c:404:13: got int [signed] value arch/x86/kernel/early_printk.c:405:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:405:13: expected restricted __le16 [assigned] [usertype] wIndex arch/x86/kernel/early_printk.c:405:13: got int [signed] index arch/x86/kernel/early_printk.c:406:14: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:406:14: expected restricted __le16 [assigned] [usertype] wLength arch/x86/kernel/early_printk.c:406:14: got int [signed] size arch/x86/kernel/early_printk.c:845:16: warning: Using plain integer as NULL pointer arch/x86/kernel/early_printk.c:992:13: warning: symbol 'enable_debug_console' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 28155acf706e..825e9136b026 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -401,9 +401,9 @@ static int dbgp_control_msg(unsigned devnum, int requesttype, int request, /* Compute the control message */ req.bRequestType = requesttype; req.bRequest = request; - req.wValue = value; - req.wIndex = index; - req.wLength = size; + req.wValue = cpu_to_le16(value); + req.wIndex = cpu_to_le16(index); + req.wLength = cpu_to_le16(size); pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); addr = DBGP_EPADDR(devnum, 0); @@ -842,7 +842,7 @@ static int __init early_dbgp_init(char *s) ret = ehci_setup(); if (ret < 0) { dbgp_printk("ehci_setup failed\n"); - ehci_debug = 0; + ehci_debug = NULL; return -1; } @@ -989,7 +989,7 @@ static int __init setup_early_printk(char *buf) return 0; } -void __init enable_debug_console(char *buf) +static void __init enable_debug_console(char *buf) { #ifdef DBGP_DEBUG struct console *old_early_console = NULL; -- cgit v1.2.3 From 8a7c5ef3ba8cca8f9c4f91ddef3d081f517914ae Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 19 Aug 2008 02:13:55 +0200 Subject: x86 iommu: remove unneeded parenthesis The parenthesis in __iommu_queue_command() are not needed when assigning into 'target' variable. Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f2ede5..69b4d060b21c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -65,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u8 *target; tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = (iommu->cmd_buf + tail); + target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); -- cgit v1.2.3 From 7e00df5818964298c9821365a6cb7a8304227c5c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/feature_names.c | 3 ++- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..0785b3c8d043 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017e..c3afba5a81a7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..c9017799497c 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ -- cgit v1.2.3 From f1c5d30e1d79bbfb60eaf189db862d3cb2bcac92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..65a0c1b48696 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From f607e3a03c90e8c050cb0c12ec9967c2925cc812 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Aug 2008 13:34:59 -0700 Subject: Revert "[CPUFREQ][2/2] preregister support for powernow-k8" This reverts commit 34ae7f35a21694aa5cb8829dc5142c39d73d6ba0, which has been reported to cause a number of problems. During suspend and resume, it apparently causes a crash in a CPU hotplug notifier to happen, although the exact details are sketchy because of the inability to get good traces during the suspend sequence. See buzilla entries http://bugzilla.kernel.org/show_bug.cgi?id=11296 http://bugzilla.kernel.org/show_bug.cgi?id=11339 for more examples and details. [ Mark: "Revert the patch for now. I'm still looking into getting a reliable reproduction and I do not have a fix at this time." ] Requested-by: Rafael J. Wysocki Acked-by: Mark Langsdorf Acked-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 109 ++++++++++-------------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 3 +- 2 files changed, 37 insertions(+), 75 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4e7271999a74..84bb395038d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -737,63 +737,44 @@ static int find_psb_table(struct powernow_k8_data *data) #ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { - if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; - data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; -} - - -static struct acpi_processor_performance *acpi_perf_data; -static int preregister_valid; - -static int powernow_k8_cpu_preinit_acpi(void) -{ - acpi_perf_data = alloc_percpu(struct acpi_processor_performance); - if (!acpi_perf_data) - return -ENODEV; - - if (acpi_processor_preregister_performance(acpi_perf_data)) - return -ENODEV; - else - preregister_valid = 1; - return 0; + data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val; - int cpu = 0; - data->acpi_data = percpu_ptr(acpi_perf_data, cpu); - if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { + if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); return -EIO; } /* verify the data contained in the ACPI structures */ - if (data->acpi_data->state_count <= 1) { + if (data->acpi_data.state_count <= 1) { dprintk("No ACPI P-States\n"); goto err_out; } - if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data->control_register.space_id, - data->acpi_data->status_register.space_id); + data->acpi_data.control_register.space_id, + data->acpi_data.status_register.space_id); goto err_out; } /* fill in data->powernow_table */ powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data->state_count + 1)), GFP_KERNEL); + * (data->acpi_data.state_count + 1)), GFP_KERNEL); if (!powernow_table) { dprintk("powernow_table memory alloc failure\n"); goto err_out; @@ -806,12 +787,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) if (ret_val) goto err_out_mem; - powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data->state_count].index = 0; + powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; /* fill in data */ - data->numps = data->acpi_data->state_count; + data->numps = data->acpi_data.state_count; if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -819,31 +800,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - /* determine affinity, from ACPI if available */ - if (preregister_valid) { - if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || - (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) - data->starting_core_affinity = data->acpi_data->shared_cpu_map; - else - data->starting_core_affinity = cpumask_of_cpu(data->cpu); - } else { - /* best guess from family if not */ - if (cpu_family == CPU_HW_PSTATE) - data->starting_core_affinity = cpumask_of_cpu(data->cpu); - else - data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu); - } - return 0; err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(data->acpi_data, data->cpu); + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data->state_count = 0; + data->acpi_data.state_count = 0; return -ENODEV; } @@ -855,10 +821,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; - for (i = 0; i < data->acpi_data->state_count; i++) { + for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; - index = data->acpi_data->states[i].control & HW_PSTATE_MASK; + index = data->acpi_data.states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); @@ -874,7 +840,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf powernow_table[i].index = index; - powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; + powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } return 0; } @@ -883,16 +849,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf { int i; int cntlofreq = 0; - for (i = 0; i < data->acpi_data->state_count; i++) { + for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; u32 vid; if (data->exttype) { - fid = data->acpi_data->states[i].status & EXT_FID_MASK; - vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; } else { - fid = data->acpi_data->states[i].control & FID_MASK; - vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); @@ -933,10 +899,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf cntlofreq = i; } - if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", powernow_table[i].frequency, - (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } @@ -946,12 +912,11 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { - if (data->acpi_data->state_count) - acpi_processor_unregister_performance(data->acpi_data, data->cpu); + if (data->acpi_data.state_count) + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); } #else -static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } @@ -1136,7 +1101,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; int rc; if (!cpu_online(pol->cpu)) @@ -1209,7 +1174,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* run on any CPU again */ set_cpus_allowed_ptr(current, &oldmask); - pol->cpus = data->starting_core_affinity; + if (cpu_family == CPU_HW_PSTATE) + pol->cpus = cpumask_of_cpu(pol->cpu); + else + pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1332,7 +1300,6 @@ static int __cpuinit powernowk8_init(void) } if (supported_cpus == num_online_cpus()) { - powernow_k8_cpu_preinit_acpi(); printk(KERN_INFO PFX "Found %d %s " "processors (%d cpu cores) (" VERSION ")\n", num_online_nodes(), @@ -1349,10 +1316,6 @@ static void __exit powernowk8_exit(void) dprintk("exit\n"); cpufreq_unregister_driver(&cpufreq_amd64_driver); - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI - free_percpu(acpi_perf_data); -#endif } MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index a62612cd4be8..ab48cfed4d96 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -33,13 +33,12 @@ struct powernow_k8_data { #ifdef CONFIG_X86_POWERNOW_K8_ACPI /* the acpi table needs to be kept. it's only available if ACPI was * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance *acpi_data; + struct acpi_processor_performance acpi_data; #endif /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ cpumask_t *available_cores; - cpumask_t starting_core_affinity; }; -- cgit v1.2.3 From c6744955d0ec0cb485c28c51eeb7185e260f6172 Mon Sep 17 00:00:00 2001 From: Samuel Sieb Date: Wed, 6 Aug 2008 22:06:29 -0700 Subject: x86: fix "kernel won't boot on a Cyrix MediaGXm (Geode)" Cyrix MediaGXm/Cx5530 Unicorn Revision 1.19.3B has stopped booting starting at v2.6.22. The reason is this commit: > commit f25f64ed5bd3c2932493681bdfdb483ea707da0a > Author: Juergen Beisert > Date: Sun Jul 22 11:12:38 2007 +0200 > > x86: Replace NSC/Cyrix specific chipset access macros by inlined functions. this commit activated a macro which was dormant before due to (buggy) macro side-effects. I've looked through various datasheets and found that the GXm and GXLV Geode processors don't have an incrementor. Remove the incrementor setup entirely. As the incrementor value differs according to clock speed and we would hope that the BIOS configures it correctly, it is probably the right solution. Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..e710a21bb6e8 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -134,23 +134,6 @@ static void __cpuinit set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - /* * Configure later MediaGX and/or Geode processor. */ @@ -174,7 +157,6 @@ static void __cpuinit geode_configure(void) set_cx86_memwb(); set_cx86_reorder(); - set_cx86_inc(); local_irq_restore(flags); } -- cgit v1.2.3 From 8343ef2437c599d30568e6b5a257a40bf2f4902b Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Wed, 20 Aug 2008 00:16:13 +0200 Subject: x86-microcode: fix unbalanced use of get_cpu() Don't use get_cpu() at all. Resort to checking a boot-up CPU (#0) in microcode_{intel,amd}_module_init(). Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 10 ++++++---- arch/x86/kernel/microcode_intel.c | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 33b2a217a8c5..a6e76ccf8158 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -500,13 +500,15 @@ static struct microcode_ops microcode_amd_ops = { static int __init microcode_amd_module_init(void) { - struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + struct cpuinfo_x86 *c = &cpu_data(0); equiv_cpu_table = NULL; - if (c->x86_vendor == X86_VENDOR_AMD) - return microcode_init(µcode_amd_ops, THIS_MODULE); - else + if (c->x86_vendor != X86_VENDOR_AMD) { + printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n"); return -ENODEV; + } + + return microcode_init(µcode_amd_ops, THIS_MODULE); } static void __exit microcode_amd_module_exit(void) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index d2d9d74f4cbb..6dd8907ff22e 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -531,12 +531,14 @@ static struct microcode_ops microcode_intel_ops = { static int __init microcode_intel_module_init(void) { - struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + struct cpuinfo_x86 *c = &cpu_data(0); - if (c->x86_vendor == X86_VENDOR_INTEL) - return microcode_init(µcode_intel_ops, THIS_MODULE); - else + if (c->x86_vendor != X86_VENDOR_INTEL) { + printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n"); return -ENODEV; + } + + return microcode_init(µcode_intel_ops, THIS_MODULE); } static void __exit microcode_intel_module_exit(void) -- cgit v1.2.3 From d45de40934897c6ee5b05141f7895bbb28512395 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Wed, 20 Aug 2008 00:22:26 +0200 Subject: x86-microcode: generic interface refactoring This is the 1st patch in the series. Here the aim was to avoid any significant changes, logically-wise. So it's mainly about generic interface refactoring: e.g. make microcode_{intel,amd}.c more about arch-specific details and less about policies like make-sure-we-run-on-a-target-cpu (no more set_cpus_allowed_ptr() here) and generic synchronization (no more microcode_mutex here). All in all, more line have been deleted than added. 4 files changed, 145 insertions(+), 198 deletions(-) Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 155 +++++++++++++++++++++++++------------- arch/x86/kernel/microcode_amd.c | 77 +++---------------- arch/x86/kernel/microcode_intel.c | 91 ++++------------------ 3 files changed, 129 insertions(+), 194 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index ad136ad99cb3..b2f84ce5eed3 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -/*#define DEBUG pr_debug */ +/* #define DEBUG pr_debug */ #include #include #include @@ -104,8 +104,7 @@ MODULE_LICENSE("GPL"); struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -DEFINE_MUTEX(microcode_mutex); -EXPORT_SYMBOL_GPL(microcode_mutex); +static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); @@ -234,22 +233,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); struct platform_device *microcode_pdev; EXPORT_SYMBOL_GPL(microcode_pdev); -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - microcode_ops->collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - microcode_ops->cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t sz) @@ -266,14 +249,15 @@ static ssize_t reload_store(struct sys_device *dev, cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = microcode_ops->cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); + if (cpu_online(cpu)) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + mutex_lock(µcode_mutex); + if (uci->valid) + err = microcode_ops->cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); + } put_online_cpus(); - set_cpus_allowed_ptr(current, &old); } if (err) return err; @@ -285,7 +269,7 @@ static ssize_t version_show(struct sys_device *dev, { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - return sprintf(buf, "0x%x\n", uci->rev); + return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); } static ssize_t pf_show(struct sys_device *dev, @@ -293,7 +277,7 @@ static ssize_t pf_show(struct sys_device *dev, { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - return sprintf(buf, "0x%x\n", uci->pf); + return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); } static SYSDEV_ATTR(reload, 0200, NULL, reload_store); @@ -312,7 +296,85 @@ static struct attribute_group mc_attr_group = { .name = "microcode", }; -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + microcode_ops->microcode_fini_cpu(cpu); + uci->valid = 0; + mutex_unlock(µcode_mutex); +} + +static void collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + memset(uci, 0, sizeof(*uci)); + if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) + uci->valid = 1; +} + +static void microcode_resume_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpu_signature nsig; + + pr_debug("microcode: CPU%d resumed\n", cpu); + + if (!uci->mc.valid_mc) + return; + + /* + * Let's verify that the 'cached' ucode does belong + * to this cpu (a bit of paranoia): + */ + if (microcode_ops->collect_cpu_info(cpu, &nsig)) { + microcode_fini_cpu(cpu); + return; + } + + if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { + microcode_fini_cpu(cpu); + /* Should we look for a new ucode here? */ + return; + } + + microcode_ops->apply_microcode(cpu); +} + +void microcode_update_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); + /* + * Check if the system resume is in progress (uci->valid != NULL), + * otherwise just request a firmware: + */ + if (uci->valid) { + microcode_resume_cpu(cpu); + } else { + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING) + microcode_ops->cpu_request_microcode(cpu); + } + mutex_unlock(µcode_mutex); +} + +static void microcode_init_cpu(int cpu) +{ + cpumask_t old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + microcode_update_cpu(cpu); + set_cpus_allowed_ptr(current, &old); +} + +static int mc_sysdev_add(struct sys_device *sys_dev) { int err, cpu = sys_dev->id; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -327,16 +389,10 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) if (err) return err; - microcode_init_cpu(cpu, resume); - + microcode_init_cpu(cpu); return 0; } -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - static int mc_sysdev_remove(struct sys_device *sys_dev) { int cpu = sys_dev->id; @@ -345,7 +401,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; pr_debug("microcode: CPU%d removed\n", cpu); - microcode_ops->microcode_fini_cpu(cpu); + microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } @@ -376,33 +432,26 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) sys_dev = get_cpu_sysdev(cpu); switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_ops->microcode_fini_cpu(cpu); - break; case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (microcode_ops->apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_ops->microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } + microcode_init_cpu(cpu); + case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: + pr_debug("microcode: CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) printk(KERN_ERR "microcode: Failed to create the sysfs " "group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + pr_debug("microcode: CPU%d removed\n", cpu); + break; + case CPU_DEAD: + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); break; } return NOTIFY_OK; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index a6e76ccf8158..4006e5e3adf0 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -59,38 +59,28 @@ MODULE_LICENSE("GPL v2"); /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -extern struct mutex (microcode_mutex); - struct equiv_cpu_entry *equiv_cpu_table; -extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info_amd(int cpu) +static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - uci->rev = 0; - uci->pf = 0; - uci->mc.mc_amd = NULL; - uci->valid = 1; + memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", cpu); - uci->valid = 0; - return; + return -1; } asm volatile("movl %1, %%ecx; rdmsr" - : "=a" (uci->rev) + : "=a" (csig->rev) : "i" (0x0000008B) : "ecx"); printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", - uci->rev); + csig->rev); + + return 0; } static int get_matching_microcode_amd(void *mc, int cpu) @@ -119,7 +109,7 @@ static int get_matching_microcode_amd(void *mc, int cpu) if (equiv_cpu_table == NULL) { printk(KERN_INFO "microcode: CPU%d microcode update with " "version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->rev); + cpu, mc_header->patch_id, uci->cpu_sig.rev); goto out; } @@ -185,12 +175,12 @@ static int get_matching_microcode_amd(void *mc, int cpu) pci_dev_put(sb_pci_dev); } - if (mc_header->patch_id <= uci->rev) + if (mc_header->patch_id <= uci->cpu_sig.rev) return 0; printk(KERN_INFO "microcode: CPU%d found a matching microcode " "update with version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->rev); + cpu, mc_header->patch_id, uci->cpu_sig.rev); out: new_mc = vmalloc(UCODE_MAX_SIZE); @@ -250,9 +240,9 @@ static void apply_microcode_amd(int cpu) printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x \n", - cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id); + cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id); - uci->rev = rev; + uci->cpu_sig.rev = rev; } #ifdef CONFIG_MICROCODE_OLD_INTERFACE @@ -437,61 +427,18 @@ static int cpu_request_microcode_amd(int cpu) return error; } -static int apply_microcode_check_cpu_amd(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - unsigned int rev; - cpumask_t old; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc.mc_amd) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16) - err = -EINVAL; - - if (!err) { - asm volatile("movl %1, %%ecx; rdmsr" - : "=a" (rev) - : "i" (0x0000008B) : "ecx"); - - if (uci->rev != rev) - err = -EINVAL; - } - - if (!err) - apply_microcode_amd(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " rev=0x%x\n", - cpu, uci->rev); - - set_cpus_allowed(current, old); - return err; -} - static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); - uci->valid = 0; vfree(uci->mc.mc_amd); uci->mc.mc_amd = NULL; - mutex_unlock(µcode_mutex); } static struct microcode_ops microcode_amd_ops = { .get_next_ucode = get_next_ucode_amd, .get_matching_microcode = get_matching_microcode_amd, .microcode_sanity_check = NULL, - .apply_microcode_check_cpu = apply_microcode_check_cpu_amd, .cpu_request_microcode = cpu_request_microcode_amd, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 6dd8907ff22e..c9b53202ba3d 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -122,46 +122,37 @@ MODULE_LICENSE("GPL"); /* serialize access to the physical write to MSR 0x79 */ static DEFINE_SPINLOCK(microcode_update_lock); -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -extern struct mutex microcode_mutex; - -extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) +static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc.mc_intel = NULL; - uci->valid = 1; + memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { printk(KERN_ERR "microcode: CPU%d not a capable Intel " "processor\n", cpu_num); - uci->valid = 0; - return; + return -1; } - uci->sig = cpuid_eax(0x00000001); + csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { /* get processor flags from MSR 0x17 */ rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); + csig->pf = 1 << ((val[1] >> 18) & 7); } wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ sync_core(); /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); + rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); + csig->sig, csig->pf, csig->rev); + + return 0; } static inline int microcode_update_match(int cpu_num, @@ -169,8 +160,8 @@ static inline int microcode_update_match(int cpu_num, { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) + if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf) + || mc_header->rev <= uci->cpu_sig.rev) return 0; return 1; } @@ -289,7 +280,7 @@ static int get_matching_microcode(void *mc, int cpu) find: pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, mc_header->rev, uci->rev); + cpu, mc_header->rev, uci->cpu_sig.rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -335,16 +326,16 @@ static void apply_microcode(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); if (val[1] != uci->mc.mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %04x-%02x-%02x \n", - cpu_num, uci->rev, val[1], + cpu_num, uci->cpu_sig.rev, val[1], uci->mc.mc_intel->hdr.date & 0xffff, uci->mc.mc_intel->hdr.date >> 24, (uci->mc.mc_intel->hdr.date >> 16) & 0xff); - uci->rev = val[1]; + uci->cpu_sig.rev = val[1]; } #ifdef CONFIG_MICROCODE_OLD_INTERFACE @@ -459,70 +450,18 @@ static int cpu_request_microcode(int cpu) return error; } -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc.mc_intel) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed_ptr(current, &old); - return err; -} - static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); - uci->valid = 0; vfree(uci->mc.mc_intel); uci->mc.mc_intel = NULL; - mutex_unlock(µcode_mutex); } static struct microcode_ops microcode_intel_ops = { .get_next_ucode = get_next_ucode, .get_matching_microcode = get_matching_microcode, .microcode_sanity_check = microcode_sanity_check, - .apply_microcode_check_cpu = apply_microcode_check_cpu, .cpu_request_microcode = cpu_request_microcode, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, -- cgit v1.2.3 From 99dd8713306a89f3e106143581244e550e00a644 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 19 Aug 2008 12:51:59 -0500 Subject: x86, SGI UV: hardcode the TLB flush interrupt system vector The UV TLB shootdown mechanism needs a system interrupt vector. Its vector had been hardcoded as 200, but needs to moved to the reserved system vector range so that it does not collide with some device vector. This is still temporary until dynamic system IRQ allocation is provided. But it will be needed when real UV hardware becomes available and runs 2.6.27. Signed-off-by: Cliff Wickman Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab0..8b8c0d6640fa 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) uv_init_blade(blade, node, cur_cpu); cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); return 0; -- cgit v1.2.3 From 63d3a75d6f1fcf2f33e6abbe84e1f428c3586152 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 19 Aug 2008 13:19:36 -0700 Subject: x86/paravirt: add spin_lock_flags lock op It is useful for a pv_lock_ops backend to know whether interrupts are enabled or not in the context a spin_lock is being called. This allows it to enable interrupts while spinning, which could be particularly helpful when spinning becomes blocking. The default implementation just calls the normal spin_lock op, ignoring the flags. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 38d7f7f1dbc9..206359a8e43f 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,12 +7,18 @@ #include +static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +{ + __raw_spin_lock(lock); +} + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .spin_is_locked = __ticket_spin_is_locked, .spin_is_contended = __ticket_spin_is_contended, .spin_lock = __ticket_spin_lock, + .spin_lock_flags = default_spin_lock_flags, .spin_trylock = __ticket_spin_trylock, .spin_unlock = __ticket_spin_unlock, #endif -- cgit v1.2.3 From 85d577979a8a94a32ca1d828a91c2eb8300bc9f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 18 Aug 2008 11:58:17 +0200 Subject: werror: fix pci calgary Fix an integer comparison always false warning in the PCI Calgary 64 driver. A u8 is being compared to something that's 512 by default, resulting in the following warning: arch/x86/kernel/pci-calgary_64.c:1285: warning: comparison is always false due to limited range of data type This was introduced by patch b34e90b8f0f30151349134f87b5dc6ef75a5218c. Signed-off-by: David Howells Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783ed7a8..363a74ec7a76 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1269,13 +1269,15 @@ static inline int __init determine_tce_table_size(u64 ram) static int __init build_detail_arrays(void) { unsigned long ptr; - int i, scal_detail_size, rio_detail_size; + unsigned numnodes, i; + int scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + numnodes = rio_table_hdr->num_scal_dev; + if (numnodes > MAX_NUMNODES){ printk(KERN_WARNING "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", - MAX_NUMNODES, rio_table_hdr->num_scal_dev); + MAX_NUMNODES, numnodes); return -ENODEV; } @@ -1296,8 +1298,7 @@ static int __init build_detail_arrays(void) } ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; - i++, ptr += scal_detail_size) + for (i = 0; i < numnodes; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; for (i = 0; i < rio_table_hdr->num_rio_dev; -- cgit v1.2.3 From bde78a79a6eb015f33aa4660df1b06f5135def20 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 8 Jul 2008 09:51:56 -0700 Subject: x86: use WARN() in arch/x86/kernel Use WARN() instead of a printk+WARN_ON() pair; this way the message becomes part of the warning section for better reporting/collection. This also allowed the folding of some if()'s into the WARN() Signed-off-by: Arjan van de Ven Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 5 +---- arch/x86/kernel/pci-calgary_64.c | 3 +-- arch/x86/kernel/tsc_sync.c | 6 ++---- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8faf..b117d7f8a564 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - if (!kvm_para_available()) { - printk(KERN_WARNING + WARN(!kvm_para_available(), KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); - WARN_ON(1); - } return 0; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 363a74ec7a76..dcdac6c826e9 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { - printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " + WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); - WARN_ON(1); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89b..9ffb01c31c40 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) __raw_spin_unlock(&sync_lock); } } - if (!(now-start)) { - printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", + WARN(!(now-start), + "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); - WARN_ON(1); - } } /* -- cgit v1.2.3 From 11494547b1754c4f3bd7f707ab869e2adf54d52f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 01:01:19 -0700 Subject: x86: fix apic version warning after following patch, commit 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Author: Cyrill Gorcunov Date: Mon Aug 18 20:45:57 2008 +0400 x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now we are getting warning for acpi path on 64 bit system. make the 64-bit side fill in apic_version[] as well. [ mingo@elte.hu: build fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 12e260e8fb2a..d9770a56511a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,10 +239,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) return; } -#ifdef CONFIG_X86_32 if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; -#endif generic_processor_info(id, ver); } @@ -762,10 +760,8 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = read_apic_id(); -#ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); -#endif } } -- cgit v1.2.3 From 7946612de2087e163308e26034286fc2dc9dacf1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 20 Aug 2008 11:31:07 -0700 Subject: x86: export pv_lock_ops non-GPL None of the spinlock API is exported GPL, so there's no reason for pv_lock_ops to be. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: drago01 --- arch/x86/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d798..300da17e61cb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); -- cgit v1.2.3 From b2a6a58ca6a3ddf4e278a53199c5b8fd39adbc0e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 20 Aug 2008 18:18:26 +0200 Subject: x86: fix BUG: unable to handle kernel paging request (numaq_tsc_disable) This section mismatch: >> Seems to be a section mismatch; init_intel() is __cpuinit while >> numaq_tsc_disable() is __init. Seems to be introduced in: >> >> commit 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a >> Author: Yinghai Lu >> Date: Sat Jul 19 18:01:16 2008 -0700 >> >> x86: extend and use x86_quirks to clean up NUMAQ code > > Oops, I am wrong about numaq_tsc_disable() being __init. Still, I > believe that Yinghai might be able to say what's really wrong :-) Would lead to this crash: BUG: unable to handle kernel paging request at c08a45f0 IP: [] numaq_tsc_disable+0x0/0x40 Fixed by the patch below. Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20a..eecc8c18f010 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) } -void __init numaq_tsc_disable(void) +void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) return; -- cgit v1.2.3 From 4e1d112cac08049e764fe3c4f6d3ec92529f9f68 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: arch/x86/kernel/apm_32.c: remove duplicated #include Removed duplicated include file in arch/x86/kernel/apm_32.c. Signed-off-by: Huang Weiyi Acked-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- arch/x86/kernel/apm_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b0..b93d069aea72 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,7 +228,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 671eef85a3e885dff4ce210d8774ad50a91d5967 Mon Sep 17 00:00:00 2001 From: "Cihula, Joseph" Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: x86, e820: add support for AddressRangeUnusuable ACPI memory type Add support for the E820_UNUSABLE memory type, which is defined in Revision 3.0b (Oct. 10, 2006) of the ACPI Specification on p. 394 Table 14-1: AddressRangeUnusuable This range of address contains memory in which errors have been detected. This range must not be used by the OSPM. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7bb..291e6cd9f9c0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; + case E820_UNUSABLE: + printk("(unusable)\n"); + break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } -- cgit v1.2.3 From 8323444b5dba3fe55e56a95d20d8f55c1d6745af Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 20 Aug 2008 16:45:53 -0700 Subject: x86: PAT Update validate_pat_support for intel CPUs Pentium III and Core Solo/Duo CPUs have an erratum " Page with PAT set to WC while associated MTRR is UC may consolidate to UC " which can result in WC setting in PAT to be ineffective. We will disable PAT on such CPUs, so that we can continue to use MTRR WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..a6ef672adbba 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) return; - break; + + pat_disable("PAT WC disabled due to known CPU erratum."); + return; + case X86_VENDOR_AMD: case X86_VENDOR_CENTAUR: case X86_VENDOR_TRANSMETA: -- cgit v1.2.3 From f86399396ce7a4f4069828b7dceac5aa5113dfb5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 30 Jul 2008 18:32:27 -0300 Subject: x86, paravirt_ops: use unsigned long instead of u32 for alloc_p*() pfn args This patch changes the pfn args from 'u32' to 'unsigned long' on alloc_p*() functions on paravirt_ops, and the corresponding implementations for Xen and VMI. The prototypes for CONFIG_PARAVIRT=n are already using unsigned long, so paravirt.h now matches the prototypes on asm-x86/pgalloc.h. It shouldn't result in any changes on generated code on 32-bit, with or without CONFIG_PARAVIRT. On both cases, 'codiff -f' didn't show any change after applying this patch. On 64-bit, there are (expected) binary changes only when CONFIG_PARAVIRT is enabled, as the patch is really supposed to change the size of the pfn args. [ v2: KVM_GUEST: use the right parameter type on kvm_release_pt() ] Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Acked-by: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/vmi_32.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2b..478bca986eca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) kvm_deferred_mmu_op(&ftlb, sizeof ftlb); } -static void kvm_release_pt(u32 pfn) +static void kvm_release_pt(unsigned long pfn) { struct kvm_mmu_op_release_pt rpt = { .header.op = KVM_MMU_OP_RELEASE_PT, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922d..340b03643b95 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) { /* * This call comes in very early, before mem_map is setup. @@ -409,20 +409,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pte(u32 pfn) +static void vmi_release_pte(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pmd(u32 pfn) +static void vmi_release_pmd(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); -- cgit v1.2.3 From 38cc1c3df77c1bb739a4766788eb9fa49f16ffdf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 20:24:24 -0700 Subject: x86: work around MTRR mask setting Joshua Hoblitt reported that only 3 GB of his 16 GB of RAM is usable. Booting with mtrr_show showed us the BIOS-initialized MTRR settings - which are all wrong. So the root cause is that the BIOS has not set the mask correctly: > [ 0.429971] MSR00000200: 00000000d0000000 > [ 0.433305] MSR00000201: 0000000ff0000800 > should be ==> [ 0.433305] MSR00000201: 0000003ff0000800 > > [ 0.436638] MSR00000202: 00000000e0000000 > [ 0.439971] MSR00000203: 0000000fe0000800 > should be ==> [ 0.439971] MSR00000203: 0000003fe0000800 > > [ 0.443304] MSR00000204: 0000000000000006 > [ 0.446637] MSR00000205: 0000000c00000800 > should be ==> [ 0.446637] MSR00000205: 0000003c00000800 > > [ 0.449970] MSR00000206: 0000000400000006 > [ 0.453303] MSR00000207: 0000000fe0000800 > should be ==> [ 0.453303] MSR00000207: 0000003fe0000800 > > [ 0.456636] MSR00000208: 0000000420000006 > [ 0.459970] MSR00000209: 0000000ff0000800 > should be ==> [ 0.459970] MSR00000209: 0000003ff0000800 So detect this borkage and add the prefix 111. Signed-off-by: Yinghai Lu Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eacd..43102e03e2d1 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; + unsigned int tmp, hi; rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | tmp; + /* Expand tmp with high bits to all 1s*/ + hi = fls(tmp); + if (hi > 0) { + tmp |= ~((1<<(hi - 1)) - 1); + + if (tmp != mask_lo) { + WARN_ON("mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + mask_lo = tmp; + } + } /* This works correctly if size is a power of two, i.e. a contiguous range. */ -- cgit v1.2.3 From 5b792d320f28ff83dd4c13f984807e26235f7703 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 21 Aug 2008 13:43:51 -0700 Subject: x86, microcode_amd: fix shift warning microcode_amd.c uses ">> 32" on a 32-bit value, so gcc warns about that. The code could use something like this *untested* patch. linux-next-20080821/arch/x86/kernel/microcode_amd.c:229: warning: right shift count >= width of type Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 4006e5e3adf0..d606a05545ca 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -206,6 +206,7 @@ static void apply_microcode_amd(int cpu) unsigned int rev; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + unsigned long addr; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); @@ -215,10 +216,9 @@ static void apply_microcode_amd(int cpu) spin_lock_irqsave(µcode_update_lock, flags); - edx = (unsigned int)(((unsigned long) - &(uci->mc.mc_amd->hdr.data_code)) >> 32); - eax = (unsigned int)(((unsigned long) - &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL); + addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code; + edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); + eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); asm volatile("movl %0, %%ecx; wrmsr" : : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); -- cgit v1.2.3 From 3a6ddd5f18405ca92e004416af8ed44b9c9783d7 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Thu, 21 Aug 2008 11:32:26 -0700 Subject: x86: fix VMI for early params while fixing a different bug i moved the call to vmi_init before early params could be parsed. This broke the vmi specific commandline parameters. Fix that, by moving vmi initialization after kernel has got a chance to parse early parameters. Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a4656adab53b..362d4e7f2d38 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -604,14 +604,6 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); early_ioremap_init(); -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be before kernel pagetables are setup - * or fixmap area is touched. - */ - vmi_init(); -#endif - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); screen_info = boot_params.screen_info; edid_info = boot_params.edid_info; @@ -678,6 +670,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + /* after early param, so could get panic from serial */ reserve_early_setup_data(); -- cgit v1.2.3 From 94581094e774402a11887719bac10505236c2d51 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:39 +0200 Subject: x86: add alloc_coherent dma_ops callback to GART driver [ v2 - x86: make gart_alloc_coherent return zeroed memory FUJITA Tomonori pointed it out that the dma_alloc_coherent function should return memory set to zero. This patch adds this to the GART implementation too. ] Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d5..076e64b2d4f3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -499,6 +499,26 @@ error: return 0; } +/* allocate and map a coherent mapping */ +static void * +gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag) +{ + void *vaddr; + + vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); + if (!vaddr) + return NULL; + + *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + if (*dma_addr != bad_dma_address) + return vaddr; + + free_pages((unsigned long)vaddr, get_order(size)); + + return NULL; +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -701,6 +721,7 @@ static struct dma_mapping_ops gart_dma_ops = { .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, + .alloc_coherent = gart_alloc_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From 43a5a5a09b8cfd56047706cf904718d073ccfd33 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:40 +0200 Subject: x86: add free_coherent dma_ops callback to GART driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 076e64b2d4f3..ef753e233580 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -519,6 +519,15 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, return NULL; } +/* free a coherent mapping */ +static void +gart_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, get_order(size)); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -722,6 +731,7 @@ static struct dma_mapping_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, + .free_coherent = gart_free_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From e4ad68b651f22fa71820c0b30bb2807999b2b49f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:41 +0200 Subject: x86: add free_coherent dma_ops callback to Calgary IOMMU driver Signed-off-by: Joerg Roedel Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783ed7a8..afb020fdb19c 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -511,8 +511,22 @@ error: return ret; } +static void calgary_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + unsigned int npages; + struct iommu_table *tbl = find_iommu_table(dev); + + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + + iommu_free(tbl, dma_handle, npages); + free_pages((unsigned long)vaddr, get_order(size)); +} + static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, + .free_coherent = calgary_free_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, .map_sg = calgary_map_sg, -- cgit v1.2.3 From c5e835f9641e9fcb95d1afb24906821e98b2c6a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:42 +0200 Subject: x86: add alloc_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3e..b8ce83c98211 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,7 +72,62 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +static void * +nommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) +{ + unsigned long dma_mask; + int node; + struct page *page; + + if (hwdev->dma_mask == NULL) + return NULL; + + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + gfp |= __GFP_ZERO; + + dma_mask = hwdev->coherent_dma_mask; + if (!dma_mask) + dma_mask = *(hwdev->dma_mask); + + if (dma_mask < DMA_24BIT_MASK) + return NULL; + + node = dev_to_node(hwdev); + +#ifdef CONFIG_X86_64 + if (dma_mask <= DMA_32BIT_MASK) + gfp |= GFP_DMA32; +#endif + + /* No alloc-free penalty for ISA devices */ + if (dma_mask == DMA_24BIT_MASK) + gfp |= GFP_DMA; + +again: + page = alloc_pages_node(node, gfp, get_order(size)); + if (!page) + return NULL; + + if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + free_pages((unsigned long)page_address(page), get_order(size)); + gfp |= GFP_DMA; + goto again; + } + + *dma_addr = page_to_phys(page); + if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + flush_write_buffers(); + return page_address(page); + } + + free_pages((unsigned long)page_address(page), get_order(size)); + + return NULL; +} + struct dma_mapping_ops nommu_dma_ops = { + .alloc_coherent = nommu_alloc_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From a3a76532e0caa093c279806d8fe8608232538af0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:43 +0200 Subject: x86: add free_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b8ce83c98211..73853d3fdcac 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -126,8 +126,15 @@ again: return NULL; } +static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + struct dma_mapping_ops nommu_dma_ops = { .alloc_coherent = nommu_alloc_coherent, + .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From c647c3bb2d16246a87f49035985ddb7c1eb030df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:44 +0200 Subject: x86: cleanup dma_*_coherent functions All dma_ops implementations support the alloc_coherent and free_coherent callbacks now. This allows a big simplification of the dma_alloc_coherent function which is done with this patch. The dma_free_coherent functions is also cleaned up and calls now the free_coherent callback of the dma_ops implementation. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 121 +++++----------------------------------------- 1 file changed, 12 insertions(+), 109 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec2..613332b26e31 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -241,33 +241,15 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* Allocate DMA memory on node near device */ -static noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - /* * Allocate memory for a coherent mapping. */ -void * + void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory = NULL; - struct page *page; - unsigned long dma_mask = 0; - dma_addr_t bus; - int noretry = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + void *memory; if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; @@ -276,89 +258,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, dev = &fallback_dev; gfp |= GFP_DMA; } - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ - if (gfp & __GFP_DMA) - noretry = 1; - -#ifdef CONFIG_X86_64 - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp |= GFP_DMA32; - if (dma_mask < DMA_32BIT_MASK) - noretry = 1; - } -#endif - again: - page = dma_alloc_pages(dev, - noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (ops->map_simple) { - *dma_handle = ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)memory, get_order(size)); + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); @@ -368,17 +271,17 @@ EXPORT_SYMBOL(dma_alloc_coherent); * The caller must ensure that the device has finished accessing the mapping. */ void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) + void *vaddr, dma_addr_t bus) { struct dma_mapping_ops *ops = get_dma_ops(dev); - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_from_coherent(dev, order, vaddr)) + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; - if (ops->unmap_single) - ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 49 +++---------------------------------------- arch/x86/kernel/pci-gart_64.c | 4 ++-- 2 files changed, 5 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 613332b26e31..0a1408abcc62 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to older i386. */ -struct device fallback_dev = { +struct device x86_dma_fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; +EXPORT_SYMBOL(x86_dma_fallback_dev); int dma_set_mask(struct device *dev, u64 mask) { @@ -241,50 +242,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* - * Allocate memory for a coherent mapping. - */ - void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) { - dev = &fallback_dev; - gfp |= GFP_DMA; - } - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - - WARN_ON(irqs_disabled()); /* for portability */ - - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; - - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); -} -EXPORT_SYMBOL(dma_free_coherent); - static int __init pci_iommu_init(void) { calgary_iommu_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ef753e233580..e81df25dc067 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -276,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) unsigned long bus; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; if (!need_iommu(dev, paddr, size)) return paddr; @@ -427,7 +427,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) return 0; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; out = 0; start = 0; -- cgit v1.2.3 From 2cd54961caff9fe9109807c6603a0af0729b9591 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:46 +0200 Subject: x86, AMD IOMMU: remove obsolete FIXME comment Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f2ede5..d15081c38237 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1038,8 +1038,6 @@ out: /* * The exported free_coherent function for dma_ops. - * FIXME: fix the generic x86 DMA layer so that it actually calls that - * function. */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) -- cgit v1.2.3 From 766af9fa812f49feb4a3e62cf92f3d37f33c7fb6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 00:12:09 +0900 Subject: dma-mapping.h, x86: remove last user of dma_mapping_ops->map_simple pci-dma.c doesn't use map_simple hook any more so we can remove it from struct dma_mapping_ops now. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e81df25dc067..cfd7ec25e37e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -720,7 +720,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, - .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, -- cgit v1.2.3 From 421076e2bec1b917bdcf83da35b24f6349bf35db Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 16:29:10 +0900 Subject: x86: dma_*_coherent rework patchset v2, fix alloc_coherent dma_ops callback was added to GART, however, it doesn't return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines. This patch fixes it. This patch also removes unused gart_map_simple (dma_mapping_ops->map_simple has gone). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 338c4f241559..4d0864900b8c 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -261,20 +261,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t -gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) -{ - dma_addr_t map; - unsigned long align_mask; - - align_mask = (1UL << get_order(size)) - 1; - map = dma_map_area(dev, paddr, size, dir, align_mask); - - flush_gart(); - - return map; -} - /* Map a single area into the IOMMU */ static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) @@ -512,12 +498,21 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + unsigned long align_mask; vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; - *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + align_mask = (1UL << get_order(size)) - 1; + + if (!dev) + dev = &x86_dma_fallback_dev; + + *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + align_mask); + flush_gart(); + if (*dma_addr != bad_dma_address) return vaddr; -- cgit v1.2.3 From b05f78f5c713eda2c34e495d92495ee4f1c3b5e1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Aug 2008 01:32:50 -0700 Subject: x86_64: printout msr -v2 commandline show_msr=1 for bsp, show_msr=32 for all 32 cpus. [ mingo@elte.hu: added documentation ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 51 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 1 + 2 files changed, 52 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017e..bca2d6980e82 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -394,6 +394,49 @@ static __init int setup_noclflush(char *arg) } __setup("noclflush", setup_noclflush); +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) @@ -403,6 +446,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d798..c6044682e1e7 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, -- cgit v1.2.3 From c4bd1fdab0deec0f69aeabab22075cb22ac8ad44 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Thu, 21 Aug 2008 20:49:05 +0200 Subject: x86: fix section mismatch warning - uv_cpu_init WARNING: vmlinux.o(.cpuinit.text+0x3cc4): Section mismatch in reference from the function uv_cpu_init() to the function .init.text:uv_system_init() The function __cpuinit uv_cpu_init() references a function __init uv_system_init(). If uv_system_init is only used by uv_cpu_init then annotate uv_system_init with a matching annotation. uv_system_init was ment to be called only once, so do it from codepath (native_smp_prepare_cpus) which is called once, right before activation of other cpus (smp_init). Note: old code relied on uv_node_to_blade being initialized to 0, but it'a not initialized from anywhere. Signed-off-by: Marcin Slusarz Acked-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 8 +++++--- arch/x86/kernel/smpboot.c | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2d7e307c7779..bfa837cb16be 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -293,7 +293,9 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static __init void uv_system_init(void) +static bool uv_system_inited; + +void __init uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; union uvh_node_id_u node_id; @@ -383,6 +385,7 @@ static __init void uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); + uv_system_inited = true; } /* @@ -391,8 +394,7 @@ static __init void uv_system_init(void) */ void __cpuinit uv_cpu_init(void) { - if (!uv_node_to_blade) - uv_system_init(); + BUG_ON(!uv_system_inited); uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e139e617f422..7985c5b3f916 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1221,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); + + if (is_uv_system()) + uv_system_init(); out: preempt_enable(); } -- cgit v1.2.3 From 9754a5b840a209bc1f192d59f63e81b698a55ac8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 Aug 2008 08:22:23 +0200 Subject: x86: work around MTRR mask setting, v2 improve the debug printout: - make it actually display something - print it only once would be nice to have a WARN_ONCE() facility, to feed such things to kerneloops.org. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 43102e03e2d1..cb7d3b6a80eb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,7 +401,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - WARN_ON("mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + static int once = 1; + + if (once) { + printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + once = 0; + } mask_lo = tmp; } } -- cgit v1.2.3 From 25258ef762bc4a05fa9c4523f7dae56e3fd01864 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 20 Aug 2008 11:31:07 -0700 Subject: x86: export pv_lock_ops non-GPL None of the spinlock API is exported GPL, so there's no reason for pv_lock_ops to be. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: drago01 --- arch/x86/kernel/paravirt-spinlocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 206359a8e43f..0e9f1982b1dd 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -23,7 +23,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); void __init paravirt_use_bytelocks(void) { -- cgit v1.2.3 From bbb65d2d365efe9951290e61678dcf81ec60add4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:10 +0200 Subject: x86: use cpuid vector 0xb when available for detecting cpu topology cpuid leaf 0xb provides extended topology enumeration. This interface provides the 32-bit x2APIC id of the logical processor and it also provides a new mechanism to detect SMT and core siblings (which provides increased addressability). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 86 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 3 ++ arch/x86/kernel/cpu/intel.c | 13 +++-- arch/x86/kernel/cpu/intel_64.c | 5 +- 4 files changed, 103 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..aa9641ae6703 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,6 +7,8 @@ #include #include +#include + struct cpuid_bit { u16 feature; u8 reg; @@ -48,6 +50,90 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } +/* leaf 0xb SMT level */ +#define SMT_LEVEL 0 + +/* leaf 0xb sub-leaf types */ +#define INVALID_TYPE 0 +#define SMT_TYPE 1 +#define CORE_TYPE 2 + +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) +#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (c->cpuid_level < 0xb) + return; + + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + /* + * check if the cpuid leaf 0xb is actually implemented. + */ + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + return; + + set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + + /* + * initial apic id, which also represents 32-bit extended x2apic id. + */ + c->initial_apicid = edx; + + /* + * Populate HT related information from sub-leaf level 0. + */ + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + + /* + * Check for the Core type in the implemented sub leaves. + */ + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + +#ifdef CONFIG_X86_32 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + & core_select_mask; + c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); +#else + c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; + c->phys_proc_id = phys_pkg_id(core_plus_mask_width); +#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); + + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + return; +} + #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index af569a964e74..a2888c7b56ac 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -128,6 +128,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d76..58a6f1a0b297 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -176,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - c->x86_max_cores = num_cpu_cores(c); - - detect_ht(c); + detect_extended_topology(c); + + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = num_cpu_cores(c); + detect_ht(c); + } /* Work around errata */ Intel_errata_workarounds(c); diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f0..42d501a8c418 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -80,7 +80,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } -- cgit v1.2.3 From e17941b0c140562d92e5a3bc12b4ad88281c7926 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery use x2apic id reported by cpuid during topology discovery, instead of the apic id configured in the APIC. For most of the systems, x2apic id reported by cpuid leaf 0xb will be same as the physical apic id reported by the APIC_ID register of the APIC. We follow the suggested guidelines and use the apic id reported by the cpuid. No change to non-generic UV platforms, will use the apic id reported in the APIC_ID register as the cpuid reported apic id's may not be unique. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 7 +------ arch/x86/kernel/genx2apic_phys.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50a..0bf0a8624b8c 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -120,14 +120,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd4..e2401ab0c46e 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -118,14 +118,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } void x2apic_send_IPI_self(int vector) -- cgit v1.2.3 From 8735728ef8dc935c4fb351f913758fdbb62c308d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 22 Aug 2008 22:23:09 +0200 Subject: x86 MCE: Fix CPU hotplug problem with multiple multicore AMD CPUs During CPU hot-remove the sysfs directory created by threshold_create_bank(), defined in arch/x86/kernel/cpu/mcheck/mce_amd_64.c, has to be removed before its parent directory, created by mce_create_device(), defined in arch/x86/kernel/cpu/mcheck/mce_64.c . Moreover, when the CPU in question is hotplugged again, obviously the latter has to be created before the former. At present, the right ordering is not enforced, because all of these operations are carried out by CPU hotplug notifiers which are not appropriately ordered with respect to each other. This leads to serious problems on systems with two or more multicore AMD CPUs, among other things during suspend and hibernation. Fix the problem by placing threshold bank CPU hotplug callbacks in mce_cpu_callback(), so that they are invoked at the right places, if defined. Additionally, use kobject_del() to remove the sysfs directory associated with the kobject created by kobject_create_and_add() in threshold_create_bank(), to prevent the kernel from crashing during CPU hotplug operations on systems with two or more multicore AMD CPUs. This patch fixes bug #11337. Signed-off-by: Rafael J. Wysocki Acked-by: Andi Kleen Tested-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_64.c | 5 +++++ arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 18 +++++------------- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 65a339678ece..726a5fcdf341 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = { }; DEFINE_PER_CPU(struct sys_device, device_mce); +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 88736cadbaa6..5eb390a4b2e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: + kobject_del(b->kobj); kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; @@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, + unsigned int cpu) { - /* cpu was unsigned int to begin with */ - unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; + return; switch (action) { case CPU_ONLINE: @@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, default: break; } - out: - return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier __cpuinitdata = { - .notifier_call = threshold_cpu_callback, -}; - static __init int threshold_init_device(void) { unsigned lcpu = 0; @@ -684,7 +676,7 @@ static __init int threshold_init_device(void) if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + threshold_cpu_callback = amd_64_threshold_cpu_callback; return 0; } -- cgit v1.2.3 From 060700b571717c997a2ea5e2049b848fa248ee13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2008 11:52:06 -0700 Subject: x86: do not enable TSC notifier if we don't need it Impact: crash on non-TSC-equipped CPUs Don't enable the TSC notifier if we *either*: 1. don't have a CPU, or 2. have a CPU with constant TSC. In either of those cases, the notifier is either damaging (1) or useless(2). From: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/kernel/tsc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46af71676738..9bed5cae4bdc 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; -- cgit v1.2.3 From 93be71b672f167b1e8c23725114f86305354f0ac Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:11 +0100 Subject: x86: add cpu hotplug hooks into smp_ops Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smp.c | 6 +++++- arch/x86/kernel/smpboot.c | 8 ++++---- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0bc..e382fe0ccd66 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -91,7 +91,7 @@ static void cpu_exit_clear(void) } /* We don't actually take CPU down, just spin without interrupts. */ -static inline void play_dead(void) +void native_play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); @@ -107,7 +107,7 @@ static inline void play_dead(void) wbinvd_halt(); } #else -static inline void play_dead(void) +void native_play_dead(void) { BUG(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2a..dfd3f5752085 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -90,7 +90,7 @@ DECLARE_PER_CPU(int, cpu_state); #include /* We halt the CPU with physical CPU hotplug */ -static inline void play_dead(void) +void native_play_dead(void) { idle_task_exit(); mb(); @@ -102,7 +102,7 @@ static inline void play_dead(void) wbinvd_halt(); } #else -static inline void play_dead(void) +void native_play_dead(void) { BUG(); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640c..18f9b19f5f8f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) struct smp_ops smp_ops = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_cpus = native_smp_prepare_cpus, - .cpu_up = native_cpu_up, .smp_cpus_done = native_smp_cpus_done, .smp_send_stop = native_smp_send_stop, .smp_send_reschedule = native_smp_send_reschedule, + .cpu_up = native_cpu_up, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, + .send_call_func_ipi = native_send_call_func_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi, }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f916..c414cee296ba 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1346,7 +1346,7 @@ static void __ref remove_cpu_from_maps(int cpu) numa_remove_cpu(cpu); } -int __cpu_disable(void) +int native_cpu_disable(void) { int cpu = smp_processor_id(); @@ -1385,7 +1385,7 @@ int __cpu_disable(void) return 0; } -void __cpu_die(unsigned int cpu) +void native_cpu_die(unsigned int cpu) { /* We don't do anything here: idle task is faking death itself. */ unsigned int i; @@ -1403,12 +1403,12 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } #else /* ... !CONFIG_HOTPLUG_CPU */ -int __cpu_disable(void) +int native_cpu_disable(void) { return -ENOSYS; } -void __cpu_die(unsigned int cpu) +void native_cpu_die(unsigned int cpu) { /* We said "no" in __cpu_disable */ BUG(); -- cgit v1.2.3 From 379002586368ae22916f668011c9118c8ce8189c Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:12 +0100 Subject: x86_32: clean up play_dead The removal of the CPU from the various maps was redundant as it already happened in cpu_disable. After cleaning this up, cpu_uninit only resets the tlb state, so rename it and create a noop version for the X86_64 case (so the two play_deads can be unified later). Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/process_32.c | 17 ++++------------- 2 files changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..18f5551b32dd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -714,14 +714,10 @@ void __cpuinit cpu_init(void) mxcsr_feature_mask_init(); } -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) +void reset_lazy_tlbstate(void) { int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - /* lazy TLB state */ per_cpu(cpu_tlbstate, cpu).state = 0; per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } -#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e382fe0ccd66..d55eb49584b1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -75,26 +75,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk) #ifdef CONFIG_HOTPLUG_CPU #include -static void cpu_exit_clear(void) +/* We don't actually take CPU down, just spin without interrupts. */ +void native_play_dead(void) { int cpu = raw_smp_processor_id(); idle_task_exit(); - cpu_uninit(); - irq_ctx_exit(cpu); + reset_lazy_tlbstate(); - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - - numa_remove_cpu(cpu); -} + irq_ctx_exit(cpu); -/* We don't actually take CPU down, just spin without interrupts. */ -void native_play_dead(void) -{ - /* This must be done before dead CPU ack */ - cpu_exit_clear(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; -- cgit v1.2.3 From a21f5d88c17a40941f6239d1959d89e8493e8e01 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:13 +0100 Subject: x86: unify x86_32 and x86_64 play_dead into one function Add the new play_dead into smpboot.c, as it fits more cleanly in there alongside other CONFIG_HOTPLUG functions. Separate out the common code into its own function. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 32 -------------------------------- arch/x86/kernel/process_64.c | 23 ----------------------- arch/x86/kernel/smpboot.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 55 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d55eb49584b1..633cf06578fa 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -72,38 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -#ifdef CONFIG_HOTPLUG_CPU -#include - -/* We don't actually take CPU down, just spin without interrupts. */ -void native_play_dead(void) -{ - int cpu = raw_smp_processor_id(); - - idle_task_exit(); - - reset_lazy_tlbstate(); - - irq_ctx_exit(cpu); - - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - /* - * With physical CPU hotplug, we should halt the cpu - */ - local_irq_disable(); - /* mask all interrupts, flush any and all caches, and halt */ - wbinvd_halt(); -} -#else -void native_play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dfd3f5752085..aa28ed01cdf3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -85,29 +85,6 @@ void exit_idle(void) __exit_idle(); } -#ifdef CONFIG_HOTPLUG_CPU -DECLARE_PER_CPU(int, cpu_state); - -#include -/* We halt the CPU with physical CPU hotplug */ -void native_play_dead(void) -{ - idle_task_exit(); - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - local_irq_disable(); - /* mask all interrupts, flush any and all caches, and halt */ - wbinvd_halt(); -} -#else -void native_play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c414cee296ba..28b4287296aa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1402,6 +1402,29 @@ void native_cpu_die(unsigned int cpu) } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } + +void play_dead_common(void) +{ + idle_task_exit(); + reset_lazy_tlbstate(); + irq_ctx_exit(raw_smp_processor_id()); + + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* + * With physical CPU hotplug, we should halt the cpu + */ + local_irq_disable(); +} + +void native_play_dead(void) +{ + play_dead_common(); + wbinvd_halt(); +} + #else /* ... !CONFIG_HOTPLUG_CPU */ int native_cpu_disable(void) { @@ -1413,4 +1436,10 @@ void native_cpu_die(unsigned int cpu) /* We said "no" in __cpu_disable */ BUG(); } + +void native_play_dead(void) +{ + BUG(); +} + #endif -- cgit v1.2.3 From 8227dce7dc2cfdcc28ee0eadfb482a7ee77fba03 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:14 +0100 Subject: x86: separate generic cpu disabling code from APIC writes in cpu_disable It allows paravirt implementations of cpu_disable to share the cpu_disable_common code, without having to take on board APIC writes, which may not be appropriate. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 28b4287296aa..66b04e598817 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1346,25 +1346,9 @@ static void __ref remove_cpu_from_maps(int cpu) numa_remove_cpu(cpu); } -int native_cpu_disable(void) +void cpu_disable_common(void) { int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); - clear_local_APIC(); - /* * HACK: * Allow any queued timer interrupts to get serviced @@ -1382,6 +1366,28 @@ int native_cpu_disable(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(cpu_online_map); +} + +int native_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); + clear_local_APIC(); + + cpu_disable_common(); return 0; } -- cgit v1.2.3 From c7ffa6c26277b403920e2255d10df849bd613380 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 25 Aug 2008 13:11:27 +0300 Subject: x86: default to reboot via ACPI Triple-fault and keyboard reset may assert INIT instead of RESET; however INIT is blocked when Intel VT is enabled. This leads to a partially reset machine when invoking emergency_restart via sysrq-b: the processor is still working but other parts of the system are dead. Default to rebooting via ACPI, which correctly asserts RESET and reboots the machine. This is safe since we will fall back to keyboard reset and triple fault if acpi is not enabled or if the reset is not successful. Signed-off-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb9..f4c93f1cfc19 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -enum reboot_type reboot_type = BOOT_KBD; +/* + * Keyboard reset and triple fault may result in INIT, not RESET, which + * doesn't work when we're in vmx root mode. Try ACPI first. + */ +enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.3 From 52a8968ce95da8469ba0a9b3e4010fe31caf77a3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Aug 2008 13:35:06 +0200 Subject: x86: fix cpufreq + sched_clock() regression I noticed that my sched_clock() was slow on a number of machine, so I started looking at cpufreq. The below seems to fix the problem for me. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9bed5cae4bdc..8e786b0d665a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); return 0; } -- cgit v1.2.3 From c6f31932d0a1d2b13952f506ebc92675e2d8df80 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Aug 2008 17:27:21 -0700 Subject: x86: msr: propagate errors from smp_call_function_single() Propagate error (-ENXIO) from smp_call_function_single(). These errors can happen when a CPU is unplugged while the MSR driver is open. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/msr.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index e43938086885..9c34a1005dba 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -79,8 +79,11 @@ static ssize_t msr_read(struct file *file, char __user *buf, for (; count; count -= 8) { err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) - return -EIO; + if (err) { + if (err == -EFAULT) /* Fix idiotic error code */ + err = -EIO; + return err; + } if (copy_to_user(tmp, &data, 8)) return -EFAULT; tmp += 2; @@ -105,8 +108,11 @@ static ssize_t msr_write(struct file *file, const char __user *buf, if (copy_from_user(&data, tmp, 8)) return -EFAULT; err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) - return -EIO; + if (err) { + if (err == -EFAULT) /* Fix idiotic error code */ + err = -EIO; + return err; + } tmp += 2; } -- cgit v1.2.3 From 4b46ca701bdcdc19fcf32823f9fcabf8236e4e78 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Aug 2008 17:28:20 -0700 Subject: x86: cpuid: propagate error from smp_call_function_single() Propagate error (-ENXIO) from smp_call_function_single() in the CPUID driver. This can happen when a CPU is unplugged while the CPUID driver is open. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3be31c..23e8316c8357 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -89,6 +89,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, struct cpuid_regs cmd; int cpu = iminor(file->f_path.dentry->d_inode); u64 pos = *ppos; + int err; if (count % 16) return -EINVAL; /* Invalid chunk size */ @@ -96,7 +97,9 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, for (; count; count -= 16) { cmd.eax = pos; cmd.ecx = pos >> 32; - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); + err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); + if (err) + return err; if (copy_to_user(tmp, &cmd, 16)) return -EFAULT; tmp += 16; -- cgit v1.2.3 From 85f1cb60157e06d9e8996b02fad9ba6964523d75 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Aug 2008 17:34:27 -0700 Subject: x86: msr: correct return value on partial operations Return the correct return value when the MSR driver partially completes a request (we should return the number of bytes actually read or written, instead of the error code.) Signed-off-by: H. Peter Anvin --- arch/x86/kernel/msr.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9c34a1005dba..2e2af5d18191 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -72,7 +72,8 @@ static ssize_t msr_read(struct file *file, char __user *buf, u32 data[2]; u32 reg = *ppos; int cpu = iminor(file->f_path.dentry->d_inode); - int err; + int err = 0; + ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ @@ -82,14 +83,17 @@ static ssize_t msr_read(struct file *file, char __user *buf, if (err) { if (err == -EFAULT) /* Fix idiotic error code */ err = -EIO; - return err; + break; + } + if (copy_to_user(tmp, &data, 8)) { + err = -EFAULT; + break; } - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; tmp += 2; + bytes += 8; } - return ((char __user *)tmp) - buf; + return bytes ? bytes : err; } static ssize_t msr_write(struct file *file, const char __user *buf, @@ -99,24 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf, u32 data[2]; u32 reg = *ppos; int cpu = iminor(file->f_path.dentry->d_inode); - int err; + int err = 0; + ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; + if (copy_from_user(&data, tmp, 8)) { + err = -EFAULT; + break; + } err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); if (err) { if (err == -EFAULT) /* Fix idiotic error code */ err = -EIO; - return err; + break; } tmp += 2; + bytes += 8; } - return ((char __user *)tmp) - buf; + return bytes ? bytes : err; } static int msr_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 9ea2b82ed6265a31f9a84886d74d8a2ef01b27c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Aug 2008 17:35:25 -0700 Subject: x86: cpuid: correct return value on partial operations Return the correct return value when the CPUID driver partially completes a request (we should return the number of bytes actually read or written, instead of the error code.) Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 23e8316c8357..8e9cd6a8ec12 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -89,7 +89,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, struct cpuid_regs cmd; int cpu = iminor(file->f_path.dentry->d_inode); u64 pos = *ppos; - int err; + ssize_t bytes = 0; + int err = 0; if (count % 16) return -EINVAL; /* Invalid chunk size */ @@ -99,14 +100,17 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, cmd.ecx = pos >> 32; err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); if (err) - return err; - if (copy_to_user(tmp, &cmd, 16)) - return -EFAULT; + break; + if (copy_to_user(tmp, &cmd, 16)) { + err = -EFAULT; + break; + } tmp += 16; + bytes += 16; *ppos = ++pos; } - return tmp - buf; + return bytes ? bytes : err; } static int cpuid_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From a81726087428b541aa64604b8a94104a4d4aa8f9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 26 Aug 2008 15:13:45 -0700 Subject: x86: acpi: move acpi_mcfg_64bit_base_addr into CONFIG_PCI_MMCONFIG acpi_mcfg_64bit_base_addr is used when CONFIG_PCI_MMCONFIG is enabled. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f20470823d38..e5032d7b391d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -96,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -159,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) { if (!strcmp(mcfg->header.oem_id, "SGI")) -- cgit v1.2.3 From 11c231a962c740b3216eb6565149ae5a7944cba7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery, fix v2: Fix for !SMP build Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index aa9641ae6703..a2d1a545767d 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,6 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; @@ -132,6 +133,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); return; +#endif } #ifdef CONFIG_X86_PAT -- cgit v1.2.3 From 7414aa41a63348c3bc72d8c37b716024c29b6d50 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 17:56:44 -0700 Subject: x86: generate names for /proc/cpuinfo from We have had a number of cases where (and its predecessors) have diverged substantially from the names list in /proc/cpuinfo. This patch generates the latter from the former. It retains the option for explicitly overriding the strings, but by making that require a separate action it should at least be less likely to happen. It would be good to do a future pass and rename strings that are gratuituously different in the kernel (/proc/cpuinfo is a userspace interface and must remain constant.) Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/Makefile | 11 ++++- arch/x86/kernel/cpu/feature_names.c | 84 ------------------------------------- arch/x86/kernel/cpu/mkcapflags.pl | 32 ++++++++++++++ arch/x86/kernel/cpu/powerflags.c | 20 +++++++++ 4 files changed, 62 insertions(+), 85 deletions(-) delete mode 100644 arch/x86/kernel/cpu/feature_names.c create mode 100644 arch/x86/kernel/cpu/mkcapflags.pl create mode 100644 arch/x86/kernel/cpu/powerflags.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0fc6c144384..3ede19a4e0b2 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,7 +3,7 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o feature_names.o +obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o @@ -23,3 +23,12 @@ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + +cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h + +targets += capflags.c +$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE + $(call if_changed,mkcapflags) diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c deleted file mode 100644 index c9017799497c..000000000000 --- a/arch/x86/kernel/cpu/feature_names.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Strings for the various x86 capability flags. - * - * This file must not contain any executable code. - */ - -#include - -/* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ -const char * const x86_cap_flags[NCAPINTS*32] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, - "nopl", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -}; - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ -}; diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl new file mode 100644 index 000000000000..dfea390e1608 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h +# + +($in, $out) = @ARGV; + +open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; +open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; + +print OUT "#include \n\n"; +print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; + +while (defined($line = )) { + if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { + $macro = $1; + $feature = $2; + $tail = $3; + if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { + $feature = $1; + } + + if ($feature ne '') { + printf OUT "\t%-32s = \"%s\",\n", + "[$macro]", "\L$feature"; + } + } +} +print OUT "};\n"; + +close(IN); +close(OUT); diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c new file mode 100644 index 000000000000..5abbea297e0c --- /dev/null +++ b/arch/x86/kernel/cpu/powerflags.c @@ -0,0 +1,20 @@ +/* + * Strings for the various x86 power flags + * + * This file must not contain any executable code. + */ + +#include + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; -- cgit v1.2.3 From 2c7e9fd4c6cb7f4b0bc7162e9a30847e51a1ca1b Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Wed, 27 Aug 2008 10:35:06 -0400 Subject: x86: make poll_idle behave more like the other idle methods Make poll_idle() behave more like the other idle methods. Currently, poll_idle() returns immediately. The other idle methods all wait indefinately for some condition to come true before returning. poll_idle should emulate these other methods and also wait for a return condition, in this case, for need_resched() to become 'true'. Without this delay the idle loop spends all of its time in the outer loop that calls poll_idle. This outer loop, these days, does real work, some of it under rcu locks. That work should only be done when idle is entered and when idle exits, not continuously while idle is spinning. Signed-off-by: Joe Korty Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..4e09d26748cf 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -185,7 +185,8 @@ static void mwait_idle(void) static void poll_idle(void) { local_irq_enable(); - cpu_relax(); + while (!need_resched()) + cpu_relax(); } /* -- cgit v1.2.3 From fbb16e243887332dd5754e48ffe5b963378f3cd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Sep 2008 00:54:47 +0200 Subject: [x86] Fix TSC calibration issues Larry Finger reported at http://lkml.org/lkml/2008/9/1/90: An ancient laptop of mine started throwing errors from b43legacy when I started using 2.6.27 on it. This has been bisected to commit bfc0f59 "x86: merge tsc calibration". The unification of the TSC code adopted mostly the 64bit code, which prefers PMTIMER/HPET over the PIT calibration. Larrys system has an AMD K6 CPU. Such systems are known to have PMTIMER incarnations which run at double speed. This results in a miscalibration of the TSC by factor 0.5. So the resulting calibrated CPU/TSC speed is half of the real CPU speed, which means that the TSC based delay loop will run half the time it should run. That might explain why the b43legacy driver went berserk. On the other hand we know about systems, where the PIT based calibration results in random crap due to heavy SMI/SMM disturbance. On those systems the PMTIMER/HPET based calibration logic with SMI detection shows better results. According to Alok also virtualized systems suffer from the PIT calibration method. The solution is to use a more wreckage aware aproach than the current either/or decision. 1) reimplement the retry loop which was dropped from the 32bit code during the merge. It repeats the calibration and selects the lowest frequency value as this is probably the closest estimate to the real frequency 2) Monitor the delta of the TSC values in the delay loop which waits for the PIT counter to reach zero. If the maximum value is significantly different from the minimum, then we have a pretty safe indicator that the loop was disturbed by an SMI. 3) keep the pmtimer/hpet reference as a backup solution for systems where the SMI disturbance is a permanent point of failure for PIT based calibration 4) do the loop iteration for both methods, record the lowest value and decide after all iterations finished. 5) Set a clear preference to PIT based calibration when the result makes sense. The implementation does the reference calibration based on HPET/PMTIMER around the delay, which is necessary for the PIT anyway, but keeps separate TSC values to ensure the "independency" of the resulting calibration values. Tested on various 32bit/64bit machines including Geode 266Mhz, AMD K6 (affected machine with a double speed pmtimer which I grabbed out of the dump), Pentium class machines and AMD/Intel 64 bit boxen. Bisected-by: Larry Finger Signed-off-by: Thomas Gleixner Tested-by: Larry Finger Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 235 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 181 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8e786b0d665a..ac79bd143da8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -127,75 +127,202 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) */ unsigned long native_calibrate_tsc(void) { - unsigned long flags; - u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); - unsigned int tsc_khz_val = 0; + u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; + unsigned long flags, tscmin, tscmax; + int hpet = is_hpet_enabled(), pitcnt, i; - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); + /* + * Run 5 calibration loops to get the lowest frequency value + * (the best estimate). We use two different calibration modes + * here: + * + * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and + * load a timeout of 50ms. We read the time right after we + * started the timer and wait until the PIT count down reaches + * zero. In each wait loop iteration we read the TSC and check + * the delta to the previous read. We keep track of the min + * and max values of that delta. The delta is mostly defined + * by the IO time of the PIT access, so we can detect when a + * SMI/SMM disturbance happend between the two reads. If the + * maximum time is significantly larger than the minimum time, + * then we discard the result and have another try. + * + * 2) Reference counter. If available we use the HPET or the + * PMTIMER as a reference to check the sanity of that value. + * We use separate TSC readouts and check inside of the + * reference read for a SMI/SMM disturbance. We dicard + * disturbed values here as well. We do that around the PIT + * calibration delay loop as we have to wait for a certain + * amount of time anyway. + */ + for (i = 0; i < 5; i++) { + + tscmin = ULONG_MAX; + tscmax = 0; + pitcnt = 0; + + local_irq_save(flags); + + /* + * Read the start value and the reference count of + * hpet/pmtimer when available: + */ + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = tr1 = tr2 = get_cycles(); + + while ((inb(0x61) & 0x20) == 0) { + tr2 = get_cycles(); + delta = tr2 - tsc; + tsc = tr2; + if ((unsigned int) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned int) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * We waited at least 50ms above. Now read + * pmtimer/hpet reference again + */ + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt > 5000 && tscmax < 10 * tscmin) { + + /* Calculate the PIT value */ + delta = tr2 - tr1; + do_div(delta, 50); + + /* We take the smallest value into account */ + tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); + } + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) + continue; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) + continue; + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + } /* - * Preset the result with the raw and inaccurate PIT - * calibration value + * Now check the results. */ - delta = (tr2 - tr1); - do_div(delta, 50); - tsc_khz_val = delta; + if (tsc_pit_min == ULONG_MAX) { + /* PIT gave no useful value */ + printk(KERN_WARNING "TSC: PIT calibration failed due to " + "SMI disturbance.\n"); + + /* We don't have an alternative source, disable TSC */ + if (!hpet && !pm1 && !pm2) { + printk("TSC: No reference (HPET/PMTIMER) available\n"); + return 0; + } + + /* The alternative source failed as well, disable TSC */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " + "failed due to SMI disturbance.\n"); + return 0; + } + + /* Use the alternative source */ + printk(KERN_INFO "TSC: using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); + + return tsc_ref_min; + } - /* hpet or pmtimer available ? */ + /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; + /* The alternative source failed, use the PIT calibration value */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " + "to SMI disturbance. Using PIT calibration\n"); + return tsc_pit_min; } - tsc2 = (tsc2 - tsc1) * 1000000LL; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 5% window, the we + * use the lower frequency of those as it is probably the + * closest estimate. + */ + if (delta >= 95 && delta <= 105) { + printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", + hpet ? "HPET" : "PMTIMER"); + printk(KERN_INFO "TSC: using %s calibration value\n", + tsc_pit_min <= tsc_ref_min ? "PIT" : + hpet ? "HPET" : "PMTIMER"); + return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; } - do_div(tsc2, tsc1); - tsc_khz_val = tsc2; + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -out: - return tsc_khz_val; + /* + * The calibration values differ too much. In doubt, we use + * the PIT value as we know that there are PMTIMERs around + * running at double speed. + */ + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) -- cgit v1.2.3 From ec0c15afb41fd9ad45b53468b60db50170e22346 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Sep 2008 07:30:13 -0700 Subject: Split up PIT part of TSC calibration from native_calibrate_tsc The TSC calibration function is still very complicated, but this makes it at least a little bit less so by moving the PIT part out into a helper function of its own. Tested-by: Larry Finger Signed-of-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 132 +++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ac79bd143da8..346cae5ac423 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,15 +122,75 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Try to calibrate the TSC against the Programmable + * Interrupt Timer and return the frequency of the TSC + * in kHz. + * + * Return ULONG_MAX on failure to calibrate. + */ +static unsigned long pit_calibrate_tsc(void) +{ + u64 tsc, t1, t2, delta; + unsigned long tscmin, tscmax; + int pitcnt; + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + */ + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + + tsc = t1 = t2 = get_cycles(); + + pitcnt = 0; + tscmax = 0; + tscmin = ULONG_MAX; + while ((inb(0x61) & 0x20) == 0) { + t2 = get_cycles(); + delta = t2 - tsc; + tsc = t2; + if ((unsigned long) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned long) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } + + /* + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. + */ + if (pitcnt < 5000 || tscmax > 10 * tscmin) + return ULONG_MAX; + + /* Calculate the PIT value */ + delta = t2 - t1; + do_div(delta, 50); + return delta; +} + + /** * native_calibrate_tsc - calibrate the tsc on boot */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, tr1, tr2, tsc, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, tscmin, tscmax; - int hpet = is_hpet_enabled(), pitcnt, i; + unsigned long flags; + int hpet = is_hpet_enabled(), i; /* * Run 5 calibration loops to get the lowest frequency value @@ -157,72 +217,22 @@ unsigned long native_calibrate_tsc(void) * amount of time anyway. */ for (i = 0; i < 5; i++) { - - tscmin = ULONG_MAX; - tscmax = 0; - pitcnt = 0; - - local_irq_save(flags); + unsigned long tsc_pit_khz; /* * Read the start value and the reference count of - * hpet/pmtimer when available: + * hpet/pmtimer when available. Then do the PIT + * calibration, which will take at least 50ms, and + * read the end value. */ + local_irq_save(flags); tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Setup CTC channel 2* for mode 0, (interrupt on terminal - * count mode), binary count. Set the latch register to 50ms - * (LSB then MSB) to begin countdown. - * - * Some devices need a delay here. - */ - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - - tsc = tr1 = tr2 = get_cycles(); - - while ((inb(0x61) & 0x20) == 0) { - tr2 = get_cycles(); - delta = tr2 - tsc; - tsc = tr2; - if ((unsigned int) delta < tscmin) - tscmin = (unsigned int) delta; - if ((unsigned int) delta > tscmax) - tscmax = (unsigned int) delta; - pitcnt++; - } - - /* - * We waited at least 50ms above. Now read - * pmtimer/hpet reference again - */ + tsc_pit_khz = pit_calibrate_tsc(); tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - local_irq_restore(flags); - /* - * Sanity checks: - * - * If we were not able to read the PIT more than 5000 - * times, then we have been hit by a massive SMI - * - * If the maximum is 10 times larger than the minimum, - * then we got hit by an SMI as well. - */ - if (pitcnt > 5000 && tscmax < 10 * tscmin) { - - /* Calculate the PIT value */ - delta = tr2 - tr1; - do_div(delta, 50); - - /* We take the smallest value into account */ - tsc_pit_min = min(tsc_pit_min, (unsigned long) delta); - } + /* Pick the lowest PIT TSC calibration so far */ + tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ if (!hpet && !pm1 && !pm2) -- cgit v1.2.3 From e6a5652fd156a286faadbf7a4062b5354d4e346e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Wed, 3 Sep 2008 19:33:14 -0400 Subject: x86: add io delay quirk for Presario F700 Manually adding "io_delay=0xed" fixes system lockups in ioapic mode on this machine. System Information Manufacturer: Hewlett-Packard Product Name: Presario F700 (KA695EA#ABF) Base Board Information Manufacturer: Quanta Product Name: 30D3 Reference: https://bugzilla.redhat.com/show_bug.cgi?id=459546 Signed-off-by: Chuck Ebbert Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_delay.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 1c3a66a67f83..720d2607aacb 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "30BF") } }, + { + .callback = dmi_io_delay_0xed_port, + .ident = "Presario F700", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30D3") + } + }, { } }; -- cgit v1.2.3 From de014d617636d6a6bd5aef3b3d1f7f9a35669057 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Wed, 3 Sep 2008 18:18:01 -0700 Subject: x86: Change warning message in TSC calibration. When calibration against PIT fails, the warning that we print is misleading. In a virtualized environment the VM may get descheduled while calibration or, the check in PIT calibration may fail due to other virtualization overheads. The warning message explicitly assumes that calibration failed due to SMI's which may not be the case. Change that to something proper. Signed-off-by: Alok N Kataria Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac423..8f98e9de1b82 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -267,8 +267,7 @@ unsigned long native_calibrate_tsc(void) */ if (tsc_pit_min == ULONG_MAX) { /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: PIT calibration failed due to " - "SMI disturbance.\n"); + printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ if (!hpet && !pm1 && !pm2) { -- cgit v1.2.3 From 1625324d22409e32e3f8eb86018cad72e1c09d61 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 Aug 2008 23:01:16 -0700 Subject: x86: move dir es7000 to es7000_32.c to be aligned with numaq, summit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/es7000_32.c | 345 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 arch/x86/kernel/es7000_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f3312..dac24c4390dd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c new file mode 100644 index 000000000000..849e5cd485b8 --- /dev/null +++ b/arch/x86/kernel/es7000_32.c @@ -0,0 +1,345 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +/* + * ES7000 Globals + */ + +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; + +int es7000_plat; + +/* + * GSI override for ES7000 platforms. + */ + +static unsigned int base; + +static int +es7000_rename_gsi(int ioapic, int gsi) +{ + if (es7000_plat == ES7000_ZORRO) + return gsi; + + if (!base) { + int i; + for (i = 0; i < nr_ioapics; i++) + base += nr_ioapic_registers[i]; + } + + if (!ioapic && (gsi < 16)) + gsi += base; + return gsi; +} + +void __init +setup_unisys(void) +{ + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = ES7000_ZORRO; + else + es7000_plat = ES7000_CLASSIC; + ioapic_renumber_irq = es7000_rename_gsi; +} + +/* + * Parse the OEM Table + */ + +int __init +parse_unisys_oem (char *oemptr) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= 6; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_port = MIP_PORT(mi->mip_info); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + tp += size; + } + + if (success < 2) { + es7000_plat = NON_UNISYS; + } else + setup_unisys(); + return es7000_plat; +} + +#ifdef CONFIG_ACPI +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr) +{ + struct acpi_table_header *header = NULL; + int i = 0; + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { + struct oem_table *t = (struct oem_table *)header; + *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, + t->OEMTableSize); + return 0; + } + } + return -1; +} +#endif + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000ULL) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +void __init +es7000_sw_apic(void) +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} -- cgit v1.2.3 From cce3e057242d3d46fea07b9eb3910b0076419be5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:44 +0000 Subject: x86: TSC: define the PIT latch value separate Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac423..aa11413e7c1d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,10 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +#define CAL_MS 50 +#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) +#define CAL_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC @@ -144,8 +148,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + outb(CAL_LATCH & 0xff, 0x42); + outb(CAL_LATCH >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -166,18 +170,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than 5000 + * If we were not able to read the PIT more than PIT_MIN_LOOPS * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < 5000 || tscmax > 10 * tscmin) + if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, 50); + do_div(delta, CAL_MS); return delta; } -- cgit v1.2.3 From d683ef7afe8b6dbac6a3c681cef8a908357793ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:48 +0000 Subject: x86: TSC: separate hpet/pmtimer calculation out Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index aa11413e7c1d..ebb9bf824a07 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,43 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Calculate the TSC frequency from HPET reference + */ +static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) +{ + u64 tmp; + + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tmp, 1000000); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +/* + * Calculate the TSC frequency from PMTimer reference + */ +static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) +{ + u64 tmp; + + if (!pm1 && !pm2) + return ULONG_MAX; + + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tmp = pm2 * 1000000000LL; + do_div(tmp, PMTMR_TICKS_PER_SEC); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + #define CAL_MS 50 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) #define CAL_PIT_LOOPS 5000 @@ -247,22 +284,11 @@ unsigned long native_calibrate_tsc(void) continue; tsc2 = (tsc2 - tsc1) * 1000000LL; + if (hpet) + tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + else + tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); - if (hpet) { - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); - } - - do_div(tsc2, tsc1); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } -- cgit v1.2.3 From 827014be05e4515fa0dfc32e3100c4dab2070a98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:53 +0000 Subject: x86: TSC: use one set of reference variables Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ebb9bf824a07..52284d31fc9c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; int i; @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) for (i = 0; i < MAX_RETRIES; i++) { t1 = get_cycles(); if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; else - *pm = acpi_pm_read_early(); + *p = acpi_pm_read_early(); t2 = get_cycles(); if ((t2 - t1) < SMI_TRESHOLD) return t2; @@ -228,7 +228,7 @@ static unsigned long pit_calibrate_tsc(void) */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags; int hpet = is_hpet_enabled(), i; @@ -267,16 +267,16 @@ unsigned long native_calibrate_tsc(void) * read the end value. */ local_irq_save(flags); - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + tsc1 = tsc_read_refs(&ref1, hpet); tsc_pit_khz = pit_calibrate_tsc(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) + if (!hpet && !ref1 && !ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -285,9 +285,9 @@ unsigned long native_calibrate_tsc(void) tsc2 = (tsc2 - tsc1) * 1000000LL; if (hpet) - tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + tsc2 = calc_hpet_ref(tsc2, ref1, ref2); else - tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); + tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } @@ -301,7 +301,7 @@ unsigned long native_calibrate_tsc(void) "SMI disturbance.\n"); /* We don't have an alternative source, disable TSC */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk("TSC: No reference (HPET/PMTIMER) available\n"); return 0; } @@ -321,7 +321,7 @@ unsigned long native_calibrate_tsc(void) } /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From a977c400957451f3bd92b9ed6022f5fe8a6cbbf5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:59 +0000 Subject: x86: TSC make the calibration loop smarter The last changes made the calibration loop 250ms long which is far too much. Try to do that more clever. Experiments have shown that using a 10ms delay for the PIT based calibration gives us a good enough value. If we have a reference (HPET/PMTIMER) and the result of the PIT and the reference is close enough, then we can break out of the calibration loop on a match right away and use the reference value. Otherwise we just loop 3 times and decide then, which value to take. One caveat is that for virtualized environments the PIT calibration often does not work at all and I found out that 10us is a bit too short as well for the reference to give a sane result. The solution here is to make the last loop longer when the first two PIT calibrations failed. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 95 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 52284d31fc9c..da033b5b3e19 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -159,9 +159,14 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) return (unsigned long) deltatsc; } -#define CAL_MS 50 +#define CAL_MS 10 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) -#define CAL_PIT_LOOPS 5000 +#define CAL_PIT_LOOPS 1000 + +#define CAL2_MS 50 +#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) +#define CAL2_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable @@ -170,7 +175,7 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) * * Return ULONG_MAX on failure to calibrate. */ -static unsigned long pit_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; @@ -185,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb(CAL_LATCH & 0xff, 0x42); - outb(CAL_LATCH >> 8, 0x42); + outb(latch & 0xff, 0x42); + outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -207,18 +212,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than PIT_MIN_LOOPS + * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) + if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, CAL_MS); + do_div(delta, ms); return delta; } @@ -230,8 +235,8 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags; - int hpet = is_hpet_enabled(), i; + unsigned long flags, latch, ms; + int hpet = is_hpet_enabled(), i, loopmin; /* * Run 5 calibration loops to get the lowest frequency value @@ -257,7 +262,13 @@ unsigned long native_calibrate_tsc(void) * calibration delay loop as we have to wait for a certain * amount of time anyway. */ - for (i = 0; i < 5; i++) { + + /* Preset PIT loop values */ + latch = CAL_LATCH; + ms = CAL_MS; + loopmin = CAL_PIT_LOOPS; + + for (i = 0; i < 3; i++) { unsigned long tsc_pit_khz; /* @@ -268,7 +279,7 @@ unsigned long native_calibrate_tsc(void) */ local_irq_save(flags); tsc1 = tsc_read_refs(&ref1, hpet); - tsc_pit_khz = pit_calibrate_tsc(); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); @@ -290,6 +301,35 @@ unsigned long native_calibrate_tsc(void) tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 10% window + * then we can be sure, that the calibration + * succeeded. We break out of the loop right away. We + * use the reference value, as it is more precise. + */ + if (delta >= 90 && delta <= 110) { + printk(KERN_INFO + "TSC: PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); + return tsc_ref_min; + } + + /* + * Check whether PIT failed more than once. This + * happens in virtualized environments. We need to + * give the virtual PC a slightly longer timeframe for + * the HPET/PMTIMER to make the result precise. + */ + if (i == 1 && tsc_pit_min == ULONG_MAX) { + latch = CAL2_LATCH; + ms = CAL2_MS; + loopmin = CAL2_PIT_LOOPS; + } } /* @@ -309,7 +349,7 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed due to SMI disturbance.\n"); + "failed.\n"); return 0; } @@ -328,37 +368,18 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " - "to SMI disturbance. Using PIT calibration\n"); + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " + "Using PIT calibration\n"); return tsc_pit_min; } - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 5% window, the we - * use the lower frequency of those as it is probably the - * closest estimate. - */ - if (delta >= 95 && delta <= 105) { - printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", - hpet ? "HPET" : "PMTIMER"); - printk(KERN_INFO "TSC: using %s calibration value\n", - tsc_pit_min <= tsc_ref_min ? "PIT" : - hpet ? "HPET" : "PMTIMER"); - return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; - } - - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - /* * The calibration values differ too much. In doubt, we use * the PIT value as we know that there are PMTIMERs around - * running at double speed. + * running at double speed. At least we let the user know: */ + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From 58f7c98850a226d3fb05b1095af9f7c4ea3507ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Aug 2008 13:52:25 -0700 Subject: x86: split e820 reserved entries record to late v2 so could let BAR res register at first, or even pnp. v2: insert e820 reserve resources before pnp_system_init Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 291e6cd9f9c0..523d6c5605d1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,13 +1271,15 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ +struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - struct resource *res; u64 end; + struct resource *res; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1291,7 +1293,8 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + insert_resource(&iomem_resource, res); res++; } @@ -1303,6 +1306,19 @@ void __init e820_reserve_resources(void) } } +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + insert_resource(&iomem_resource, res); + res++; + } +} + char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; -- cgit v1.2.3 From a5444d15b611cf2ffe2bc52aaf11f2ac51882f89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Aug 2008 08:09:23 +0200 Subject: x86: split e820 reserved entries record to late v4 this one replaces: | commit a2bd7274b47124d2fc4dfdb8c0591f545ba749dd | Author: Yinghai Lu | Date: Mon Aug 25 00:56:08 2008 -0700 | | x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against BAR, v3 v2: insert e820 reserve resources before pnp_system_init v3: fix merging problem in tip/x86/core v4: address Linus's review about comments and condition in _late() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 523d6c5605d1..a7a71339bfb0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,12 +1271,12 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ -struct resource __initdata *e820_res; +static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - u64 end; struct resource *res; + u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); e820_res = res; @@ -1293,6 +1293,12 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) insert_resource(&iomem_resource, res); res++; @@ -1313,7 +1319,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + if (!res->parent && res->end) insert_resource(&iomem_resource, res); res++; } -- cgit v1.2.3 From dc44e65943169de2d1a1b494876f48a65a9737f1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Sep 2008 13:47:38 +0200 Subject: x86: capitalize function call interrupts consistently Impact: aestetic Capitalize function call interrupts consistently. All other descriptions in /proc/interrupts are capitalized except for "function call interrupts". Capitalize it too for consistency. While that's technically a published ABI I think the risk of anyone relying on that text to stay the same is negligible. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1cf8c1fcc088..b71e02d42f4f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -325,7 +325,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b238d8d2..f065fe9071b9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -129,7 +129,7 @@ skip: seq_printf(p, "CAL: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); -- cgit v1.2.3 From fac8f1e4f99dff7a0c3a929f327d66f46de6fa21 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:59:22 +0200 Subject: x86: split e820 reserved entries record to late, v7 try to insert_resource second time, by expanding the resource... for case: e820 reserved entry is partially overlapped with bar res... hope it will never happen Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a7a71339bfb0..e24d1bc47b46 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1320,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - insert_resource(&iomem_resource, res); + reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); res++; } } -- cgit v1.2.3 From 5fef55fddb7317585cabc1eae38dbd57f1c59729 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc3031..18514ed26104 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..a0534c04d38a 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ada50505a5c8..13f8fa16b815 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; -- cgit v1.2.3 From 5031088dbc0cd30e893eb27d53f0e0eee6eb1c00 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d043..8aab8517642e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* -- cgit v1.2.3 From 3da99c97763703b23cbf2bd6e96252256d4e4617 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: make (early)_identify_cpu more the same between 32bit and 64 bit 1. add extended_cpuid_level for 32bit 2. add generic_identify for 64bit 3. add early_identify_cpu for 32bit 4. early_identify_cpu not be called by identify_cpu 5. remove early in get_cpu_vendor for 32bit 6. add get_cpu_cap 7. add cpu_detect for 64bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 +++++++++++++++------------------------ arch/x86/kernel/cpu/common_64.c | 139 +++++++++++++++++++++++++--------------- 2 files changed, 141 insertions(+), 136 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642e..96e1b8698d3a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -96,7 +96,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) unsigned int *v; char *p, *q; - if (cpuid_eax(0x80000000) < 0x80000004) + if (c->extended_cpuid_level < 0x80000004) return 0; v = (unsigned int *) c->x86_model_id; @@ -125,7 +125,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; - n = cpuid_eax(0x80000000); + n = c->extended_cpuid_level; if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); @@ -186,7 +186,7 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; @@ -198,8 +198,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; + this_cpu = cpu_devs[i]; return; } } @@ -284,34 +283,30 @@ void __init cpu_detect(struct cpuinfo_x86 *c) } } } -static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - unsigned int ebx; + u32 ebx; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (have_cpuid_p()) { - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } - } - } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -321,25 +316,29 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; if (!have_cpuid_p()) return; + c->extended_cpuid_level = 0; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + cpu_detect(c); - get_cpu_vendor(c, 1); + get_cpu_vendor(c); - early_get_cap(c); + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); } /* @@ -373,60 +372,32 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - unsigned int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - c->initial_apicid = (ebx >> 24) & 0xFF; + if (!have_cpuid_p()) + return; + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; #else - c->apicid = c->initial_apicid; + c->apicid = c->initial_apicid; #endif - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ - init_scattered_cpuid_features(c); - detect_nopl(c); - } + init_scattered_cpuid_features(c); + detect_nopl(c); } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -651,13 +622,10 @@ void __init early_cpu_init(void) { struct cpu_vendor_dev *cvdev; - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_detect(); - validate_pat_support(&boot_cpu_data); + early_identify_cpu(&boot_cpu_data); } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 699ecbfb63ec..28719fe07941 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -195,6 +195,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) printk(KERN_ERR "CPU: Your system may be unstable.\n"); } c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; } static void __init early_cpu_support_print(void) @@ -249,56 +250,18 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) } } -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -void __init early_cpu_init(void) +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_support_print(); - early_identify_cpu(&boot_cpu_data); -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; @@ -306,17 +269,32 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } +} + + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + u32 ebx; + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -325,8 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[6] = cpuid_ecx(0x80000001); } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ } /* Transmeta-defined flags: level 0x80860001 */ @@ -346,8 +322,26 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +} - detect_nopl(c); +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __init early_identify_cpu(struct cpuinfo_x86 *c) +{ + + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) @@ -356,6 +350,39 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) +{ + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ + + init_scattered_cpuid_features(c); + detect_nopl(c); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -363,9 +390,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; - early_identify_cpu(c); + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); - init_scattered_cpuid_features(c); + generic_identify(c); c->apicid = phys_pkg_id(0); -- cgit v1.2.3 From 9d31d35b5f9d619bb2482235cc889326de049e29 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: order functions in cpu/common.c and cpu/common_64.c v2 v2: make 64 bit get c->x86_cache_alignment = c->x86_clfush_size Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 190 ++++++++++++++++++++++------------------ arch/x86/kernel/cpu/common_64.c | 106 +++++++++++----------- 2 files changed, 156 insertions(+), 140 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96e1b8698d3a..10e89ae5a600 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -60,6 +60,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -123,15 +135,15 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ecx, edx, l2size; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24)+(edx>>24); + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -185,6 +197,51 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { @@ -258,7 +315,26 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -void __init cpu_detect(struct cpuinfo_x86 *c) +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -267,19 +343,20 @@ void __init cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; + /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; + c->x86_model += ((tfms >> 16) & 0xf) << 4; if (cap0 & (1<<19)) { - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; } } } @@ -341,6 +418,17 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + /* * The NOPL instruction is supposed to exist on all CPUs with * family >= 6, unfortunately, that's not true in practice because @@ -500,7 +588,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0 ; i < NCAPINTS ; i++) + for (i = 0; i < NCAPINTS; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -529,52 +617,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -592,17 +634,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); + printk(KERN_CONT "%s ", vendor); - if (!c->x86_model_id[0]) - printk("%d86", c->x86); + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); else - printk("%s", c->x86_model_id); + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); + printk(KERN_CONT " stepping %02x\n", c->x86_mask); else - printk("\n"); + printk(KERN_CONT "\n"); } static __init int setup_disablecpuid(char *arg) @@ -618,16 +660,6 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -void __init early_cpu_init(void) -{ - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - - early_identify_cpu(&boot_cpu_data); -} - /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -636,18 +668,6 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) return regs; } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 28719fe07941..522a5f2e405d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,9 +103,8 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; @@ -143,8 +142,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); smp_num_siblings = 1; return; } @@ -182,7 +181,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (cpu_devs[i]) { if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { + !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; this_cpu = cpu_devs[i]; return; @@ -217,39 +216,6 @@ static void __init early_cpu_support_print(void) } } -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -258,6 +224,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); + c->x86 = 4; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -268,12 +235,11 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (cap0 & (1<<19)) + c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; + c->x86_cache_alignment = c->x86_clflush_size; + } } } @@ -283,9 +249,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) u32 tfms, xlvl; u32 ebx; - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; @@ -361,6 +324,39 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; @@ -448,7 +444,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) } -void __cpuinit identify_boot_cpu(void) +void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); } @@ -460,13 +456,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - struct msr_range { unsigned min; unsigned max; @@ -510,6 +499,13 @@ static __init int setup_show_msr(char *arg) } __setup("show_msr=", setup_show_msr); +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) -- cgit v1.2.3 From 10a434fcb23a57c385177a0086955fae01003f64 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:45 +0200 Subject: x86: remove cpu_vendor_dev 1. add c_x86_vendor into cpu_dev 2. change cpu_devs to static 3. check c_x86_vendor before put that cpu_dev into array 4. remove alignment for 64bit 5. order the sequence in cpu_devs according to link sequence... so could put intel at first, then amd... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 4 +-- arch/x86/kernel/cpu/amd.c | 3 +- arch/x86/kernel/cpu/amd_64.c | 4 +-- arch/x86/kernel/cpu/centaur.c | 3 +- arch/x86/kernel/cpu/centaur_64.c | 3 +- arch/x86/kernel/cpu/common.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/cpu.h | 18 ++++------ arch/x86/kernel/cpu/cyrix.c | 6 ++-- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/cpu/intel_64.c | 3 +- arch/x86/kernel/cpu/transmeta.c | 3 +- arch/x86/kernel/cpu/umc.c | 3 +- arch/x86/kernel/vmlinux_32.lds.S | 8 ++--- arch/x86/kernel/vmlinux_64.lds.S | 9 +++-- 15 files changed, 106 insertions(+), 106 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3ede19a4e0b2..403e689df0b8 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,14 +8,14 @@ obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 18514ed26104..d64ea6097ca7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -298,6 +298,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_early_init = early_init_amd, .c_init = init_amd, .c_size_cache = amd_size_cache, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1692b2a41ff..d1c721c0c49f 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -218,7 +218,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_ident = { "AuthenticAMD" }, .c_early_init = early_init_amd, .c_init = init_amd, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); - +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index a0534c04d38a..e5f6d89521bf 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -475,6 +475,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e1..49cfc6d2f2fb 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -29,7 +29,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10e89ae5a600..ee1044ca481d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,7 +75,7 @@ void switch_to_new_gdt(void) static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -93,8 +93,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; static int __init cachesize_setup(char *str) { @@ -250,21 +251,24 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } @@ -315,25 +319,6 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -411,21 +396,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 522a5f2e405d..b82479c10835 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -66,7 +66,7 @@ void switch_to_new_gdt(void) load_gdt(&gdt_descr); } -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -76,8 +76,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -178,44 +179,28 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -306,21 +291,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565fe..3cc9d92afd8f 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -21,21 +21,15 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); + int c_x86_vendor; }; -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +#define cpu_dev_register(cpu_devX) \ + static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + __attribute__((__section__(".x86_cpu_dev.init"))) = \ + &cpu_devX; -struct cpu_vendor_dev { - int vendor; - struct cpu_dev *cpu_dev; -}; - -#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ - static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ - __attribute__((__section__(".x86cpuvendor.init"))) = \ - { cpu_vendor_id, cpu_dev } - -extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 13f8fa16b815..3f8c7283d816 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -442,14 +442,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, + .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); +cpu_dev_register(cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, + .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); +cpu_dev_register(nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d76..c5ac08124adc 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -303,9 +303,10 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); /* arch_initcall(intel_cpu_init); */ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f0..0a8128a240df 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -90,6 +90,7 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_ident = { "GenuineIntel" }, .c_early_init = early_init_intel, .c_init = init_intel, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8f..7c46e6ecedca 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -102,6 +102,7 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, .c_identify = transmeta_identify, + .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); +cpu_dev_register(transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d75..e777f79e0960 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, + .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); +cpu_dev_register(umc_cpu_dev); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index af5bdad84604..21b4f7eefaab 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - __x86cpuvendor_start = .; - *(.x86cpuvendor.init) - __x86cpuvendor_end = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } SECURITY_INIT . = ALIGN(4); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e88..201e81a91a95 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -168,13 +168,12 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - . = ALIGN(16); - __x86cpuvendor_start = .; - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - *(.x86cpuvendor.init) + __x86_cpu_dev_start = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + *(.x86_cpu_dev.init) } - __x86cpuvendor_end = .; SECURITY_INIT + __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { -- cgit v1.2.3 From a0854a46c5eb82588126421a9cbceb973384a644 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make 32bit support show_msr like 64 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ee1044ca481d..a79cf5c52b6a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -616,6 +616,49 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -644,6 +687,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) -- cgit v1.2.3 From 01b2e16a7a9be6573cba5d594d6659b3c6cb46a0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make get_mode_name of 64bit the same as 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b82479c10835..f1fb94e766bb 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -83,6 +83,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; + char *p, *q; if (c->extended_cpuid_level < 0x80000004) return 0; @@ -92,6 +93,19 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while (*p == ' ') + p++; + if (p != q) { + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ + } + return 1; } -- cgit v1.2.3 From 0a488a53d7ca46ac638c30079072c57e50cfcc7b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:47 +0200 Subject: x86: move 32bit related functions together Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 251 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 34 +++--- 2 files changed, 143 insertions(+), 142 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a79cf5c52b6a..2b2c170e8d69 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -58,6 +60,109 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -72,9 +177,6 @@ void switch_to_new_gdt(void) asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); } -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) @@ -95,14 +197,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -133,7 +227,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -150,7 +243,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n < 0x80000006) /* Some chips just has a large L1. */ return; - ecx = cpuid_ecx(0x80000006); + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; /* do processor-specific cache resizing */ @@ -167,48 +260,23 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ + l2size, ecx & 0xFF); } #ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; + + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -225,8 +293,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -236,10 +302,14 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); + } - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); } } #endif @@ -273,52 +343,6 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) this_cpu = &default_cpu; } - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -380,16 +404,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; + c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) return; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->extended_cpuid_level = 0; + cpu_detect(c); get_cpu_vendor(c); @@ -487,31 +511,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - /* * This does the hard work of actually picking apart the CPU stuff... */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f1fb94e766bb..b2da04135326 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -37,6 +37,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -78,7 +80,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -112,7 +113,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ebx, ecx, edx; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; @@ -125,15 +126,17 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_tlbsize = 0; } - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + l2size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -142,14 +145,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT)) return; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -175,6 +177,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); } + out: if ((c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", @@ -182,7 +185,6 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } - #endif } @@ -405,10 +407,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); generic_identify(c); -- cgit v1.2.3 From 6ac40ed0413ef4096720f966e11c7cdf259eee3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Sep 2008 10:41:22 -0700 Subject: x86: quick TSC calibration Introduce a fast TSC-calibration method on sane hardware. It only uses 17920 PIT timer ticks to calibrate the TSC, plus 256 ticks on each side to make sure the TSC values were very close to the tick, so the whole calibration takes 15ms. Yet, despite only takign 15ms, we can actually give pretty stringent guarantees of accuracy: - the code requires that we hit each 256-counter block at least 50 times, so the TSC error is basically at *MOST* just a few PIT cycles off in any direction. In practice, it's going to be about one microseconds off (which is how long it takes to read the counter) - so over 17920 PIT cycles, we can pretty much guarantee that the calibration error is less than one half of a percent. My testing bears this out: on my machine, the quick-calibration reports 2934.085kHz, while the slow one reports 2933.415. Yes, the slower calibration is still more precise. For me, the slow calibration is stable to within about one hundreth of a percent, so it's (at a guess) roughly an order-and-a-half of magnitude more precise. The longer you wait, the more precise you can be. However, the nice thing about the fast TSC PIT synchronization is that it's pretty much _guaranteed_ to give that 0.5% precision, and fail gracefully (and very quickly) if it doesn't get it. And it really is fairly simple (even if there's a lot of _details_ there, and I didn't get all of those right ont he first try or even the second ;) The patch says "110 insertions", but 63 of those new lines are actually comments. Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 110 insertions(+), 1 deletions(-) --- arch/x86/kernel/tsc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index da033b5b3e19..839070ba8465 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -227,6 +227,117 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) return delta; } +/* + * This reads the current MSB of the PIT counter, and + * checks if we are running on sufficiently fast and + * non-virtualized hardware. + * + * Our expectations are: + * + * - the PIT is running at roughly 1.19MHz + * + * - each IO is going to take about 1us on real hardware, + * but we allow it to be much faster (by a factor of 10) or + * _slightly_ slower (ie we allow up to a 2us read+counter + * update - anything else implies a unacceptably slow CPU + * or PIT for the fast calibration to work. + * + * - with 256 PIT ticks to read the value, we have 214us to + * see the same MSB (and overhead like doing a single TSC + * read per MSB value etc). + * + * - We're doing 2 reads per loop (LSB, MSB), and we expect + * them each to take about a microsecond on real hardware. + * So we expect a count value of around 100. But we'll be + * generous, and accept anything over 50. + * + * - if the PIT is stuck, and we see *many* more reads, we + * return early (and the next caller of pit_expect_msb() + * then consider it a failure when they don't see the + * next expected value). + * + * These expectations mean that we know that we have seen the + * transition from one expected value to another with a fairly + * high accuracy, and we didn't miss any events. We can thus + * use the TSC value at the transitions to calculate a pretty + * good value for the TSC frequencty. + */ +static inline int pit_expect_msb(unsigned char val) +{ + int count = 0; + + for (count = 0; count < 50000; count++) { + /* Ignore LSB */ + inb(0x42); + if (inb(0x42) != val) + break; + } + return count > 50; +} + +/* + * How many MSB values do we want to see? We aim for a + * 15ms calibration, which assuming a 2us counter read + * error should give us roughly 150 ppm precision for + * the calibration. + */ +#define QUICK_PIT_MS 15 +#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) + +static unsigned long quick_pit_calibrate(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Counter 2, mode 0 (one-shot), binary count + * + * NOTE! Mode 2 decrements by two (and then the + * output is flipped each time, giving the same + * final output frequency as a decrement-by-one), + * so mode 0 is much better when looking at the + * individual counts. + */ + outb(0xb0, 0x43); + + /* Start at 0xffff */ + outb(0xff, 0x42); + outb(0xff, 0x42); + + if (pit_expect_msb(0xff)) { + int i; + u64 t1, t2, delta; + unsigned char expect = 0xfe; + + t1 = get_cycles(); + for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { + if (!pit_expect_msb(expect)) + goto failed; + } + t2 = get_cycles(); + + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement QUICK_PIT_ITERATIONS + * times, and each MSB had many hits, so we never + * had any sudden jumps. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) + */ + delta = (t2 - t1)*PIT_TICK_RATE; + do_div(delta, QUICK_PIT_ITERATIONS*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; + } +failed: + return 0; +} /** * native_calibrate_tsc - calibrate the tsc on boot @@ -235,9 +346,15 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms; + unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + if (fast_calibrate) + return fast_calibrate; + /* * Run 5 calibration loops to get the lowest frequency value * (the best estimate). We use two different calibration modes -- cgit v1.2.3 From 4156e9a8ef5b521185f451213d33fa661f38512e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Sep 2008 22:47:47 +0200 Subject: x86: quick TSC calibration, improve - make sure the final TSC timestamp is reliable too Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 839070ba8465..6dab90f68515 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -316,6 +316,12 @@ static unsigned long quick_pit_calibrate(void) } t2 = get_cycles(); + /* + * Make sure we can rely on the second TSC timestamp: + */ + if (!pit_expect_msb(--expect)) + goto failed; + /* * Ok, if we get here, then we've seen the * MSB of the PIT decrement QUICK_PIT_ITERATIONS -- cgit v1.2.3 From 97e4db7c8719f67c52793eeca5ec4df4c3407f2a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:08:59 -0700 Subject: x86: make detect_ht depend on CONFIG_X86_HT 64-bit has X86_HT set too, so use that instead of SMP. This also removes a include/asm-x86/processor.h ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d5a07f0fd24..1f80ce07daa2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -263,9 +263,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) l2size, ecx & 0xFF); } -#ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -311,8 +311,8 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } -} #endif +} static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index bcb48ce05d23..8e630734e1fd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -141,7 +141,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) void __cpuinit detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; -- cgit v1.2.3 From f0fc4aff1fa4db1c201593422dcbf85c9897ec4f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:00 -0700 Subject: x86: make header file the same in arch/x86/kernel/cpu/common_xx.c Make the files more similar in preparation to unification, no code changed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 22 +++++++++++++++++++--- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f80ce07daa2..6bbdbc24f2ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,25 +1,41 @@ #include +#include +#include #include +#include +#include +#include +#include +#include #include #include -#include #include -#include -#include #include #include #include +#include #include #include #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include +#include #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include "cpu.h" static struct cpu_dev *this_cpu __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8e630734e1fd..5daec699acf0 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #endif #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include "cpu.h" -- cgit v1.2.3 From 950ad7ff6ec17fc1b47abc95c5c74628eb1adf8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:01 -0700 Subject: x86: same gdt_page with macro Move the 32-bit and 64-bit gdt_page definitions next to each other, separated with an #ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 17 +++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bbdbc24f2ba..e0ca51f4f2d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -40,6 +40,22 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +#else DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -74,6 +90,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int cachesize_override __cpuinitdata = -1; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 5daec699acf0..b4890504284d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -39,6 +39,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -53,6 +54,42 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, } }; +#else +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +} }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; -- cgit v1.2.3 From ba51dced0b55eb62874e1646d5eeff344c3d4e67 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:02 -0700 Subject: x86: cpu/common.c, let 64-bit code have 32-bit only functions No effect on 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 +++ arch/x86/kernel/cpu/common_64.c | 111 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0ca51f4f2d3..9128ba0c8d33 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -93,6 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -195,6 +196,13 @@ static int __init x86_serial_nr_setup(char *s) return 1; } __setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b4890504284d..40c9d89cc14a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -92,6 +92,117 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, -- cgit v1.2.3 From d5494d4f517158b1d2eea1ae33f6c264eb12675f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:03 -0700 Subject: x86: cpu/common*.c, make 32-bit have 64-bit only functions Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 12 ++++ 2 files changed, 150 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9128ba0c8d33..dcd3ebd5ba62 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,143 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +#else + /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -757,6 +894,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } +#endif /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 40c9d89cc14a..9f2a6ece82dd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -704,6 +704,7 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); @@ -838,6 +839,17 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +#else + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->fs = __KERNEL_PERCPU; + return regs; +} +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT -- cgit v1.2.3 From 1ba76586f778be327e452180d8378e40ee70f066 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:04 -0700 Subject: x86: cpu/common*.c have same cpu_init(), with copying and #ifdef hard to merge by lines... (as here we have material differences between 32-bit and 64-bit mode) - will try to do it later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 124 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 88 +++++++++++++++++++++++++++- 2 files changed, 211 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dcd3ebd5ba62..f44678db1616 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -901,7 +901,129 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} + +#else + void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -982,3 +1104,5 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif + +#endif diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 9f2a6ece82dd..2bd0ed5abb0a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -855,8 +855,9 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 void __cpuinit cpu_init(void) { int cpu = stack_smp_processor_id(); @@ -974,3 +975,88 @@ void __cpuinit cpu_init(void) if (is_uv_system()) uv_cpu_init(); } + +#else + +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif + +#endif -- cgit v1.2.3 From fab334c1d5f24d23d12f98ad652d399279cd03ce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:05 -0700 Subject: x86: cpu/common*.c, merge switch_to_new_gdt() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/common_64.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f44678db1616..43d5287bb2a4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -215,7 +215,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2bd0ed5abb0a..d7b996518f86 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -214,6 +214,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -- cgit v1.2.3 From b9e67f00424e164dcd29391eb48dc941db8691ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:06 -0700 Subject: x86: cpu/common.c, merge default_init() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 43d5287bb2a4..2c4bfa2e56ad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -224,6 +224,9 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -233,6 +236,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index d7b996518f86..2fda10974813 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -223,7 +223,19 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { -- cgit v1.2.3 From 140fc72709278989f08eb756d16a70008bdcc409 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:07 -0700 Subject: x86: cpu/common*.c, merge display_cacheinfo() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++++++ arch/x86/kernel/cpu/common_64.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c4bfa2e56ad..f9191207718b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -285,6 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -293,6 +297,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; +#ifdef CONFIG_X86_64 + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -303,6 +310,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2fda10974813..f7a2d524b1e7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -285,8 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -294,7 +296,22 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; + +#ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c, l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if (l2size == 0) + return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; -- cgit v1.2.3 From 1cd78776c7d5487e3000426d0ce1ff203c5d60cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:08 -0700 Subject: x86: cpu/common*.c, merge detect_ht() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++++++++- arch/x86/kernel/cpu/common_64.c | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9191207718b..b2018f76c94d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -330,6 +330,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; @@ -346,8 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 + c->phys_proc_id = phys_pkg_id(index_msb); +#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -355,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); +#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); +#endif } out: diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f7a2d524b1e7..7ac0a00743c7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -349,7 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 c->phys_proc_id = phys_pkg_id(index_msb); +#else + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -357,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); +#else + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); +#endif } out: -- cgit v1.2.3 From 5122c890ba969e6efeaba9ed45f5936e62d3c6d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:09 -0700 Subject: x86: cpu/common.c: merge get_cpu_cap() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 2 ++ 2 files changed, 22 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2018f76c94d..ffc2f5ed09a3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -458,6 +458,26 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[6] = cpuid_ecx(0x80000001); } } + +#ifdef CONFIG_X86_64 + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +#endif } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7ac0a00743c7..75bb7ff99ee3 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -461,6 +461,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_X86_64 /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { @@ -478,6 +479,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#endif } /* Do some early cpuid on the boot CPU to get some parameter that are -- cgit v1.2.3 From 6627d2423067f2c6eedb422a59fba9270a3c5e36 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:10 -0700 Subject: x86: cpu/common*.c, merge early_identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ffc2f5ed09a3..61a70748213e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -490,7 +490,11 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 75bb7ff99ee3..6475548d64eb 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -482,15 +482,28 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) #endif } -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ +/* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, + * cache alignment. + * The others are not touched to avoid unwanted side effects. + * + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; + if (!have_cpuid_p()) + return; + memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; -- cgit v1.2.3 From 56f0d033be2ebb983993e5d7f24ae232c9a1e7f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:11 -0700 Subject: x86: cpu/common*.c: merge print_cpu_info() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6475548d64eb..68b06a39dcac 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -730,8 +730,20 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk(KERN_CONT "%s ", vendor); + if (c->x86_model_id[0]) printk(KERN_CONT "%s", c->x86_model_id); + else + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) printk(KERN_CONT " stepping %02x\n", c->x86_mask); -- cgit v1.2.3 From b89d3b3e2caeab3d895301d1aee3cd2a91eddb79 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:12 -0700 Subject: x86: cpu/common*.c, merge generic_identify() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 15 ++++++++++++--- arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61a70748213e..f35baa7f3036 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -548,6 +548,10 @@ void __init early_cpu_init(void) * of early VIA chips and (more importantly) broken virtualizers that * are not easy to detect. Hence, probe for it based on first * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { @@ -586,11 +590,16 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; -#else +# else c->apicid = c->initial_apicid; +# endif +#endif + +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; #endif } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 68b06a39dcac..869a6ff9f7dc 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -581,6 +581,9 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + if (!have_cpuid_p()) + return; + c->extended_cpuid_level = 0; cpu_detect(c); @@ -589,11 +592,21 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) get_cpu_cap(c); - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); +# else + c->apicid = c->initial_apicid; +# endif #endif +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; +#endif + } + if (c->extended_cpuid_level >= 0x80000004) get_model_name(c); /* Default name */ -- cgit v1.2.3 From 102bbe3ab83c7ac04461d277df23cd5acdb49892 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:13 -0700 Subject: x86: cpu/common*.c, merge identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 89 ++++++++++++++++++++---------- arch/x86/kernel/cpu/common_64.c | 116 ++++++++++++++++++++++++++++++---------- 2 files changed, 147 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f35baa7f3036..eef868c97b89 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -104,34 +104,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -197,13 +169,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -620,12 +627,18 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 + c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -644,6 +657,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); +#ifdef CONFIG_X86_64 + c->apicid = phys_pkg_id(0); +#endif + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -677,6 +694,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } +#ifdef CONFIG_X86_64 + detect_ht(c); +#endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -693,24 +714,34 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; +#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); +#endif select_idle_routine(c); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + numa_add_cpu(smp_processor_id()); +#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 enable_sep_cpu(); +#endif mtrr_ap_init(); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 869a6ff9f7dc..6a42371a609f 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,34 +103,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -196,13 +168,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -628,14 +635,35 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (!have_cpuid_p()) { + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + } + generic_identify(c); + if (this_cpu->c_identify) + this_cpu->c_identify(c); + +#ifdef CONFIG_X86_64 c->apicid = phys_pkg_id(0); +#endif /* * Vendor-specific initialization. In this section we @@ -650,7 +678,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; + p = table_lookup_model(c); + if (p) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + +#ifdef CONFIG_X86_64 detect_ht(c); +#endif /* * On SMP, boot_cpu_data holds the common feature set between @@ -669,11 +719,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] &= ~cleared_cpu_caps[i]; #ifdef CONFIG_X86_MCE + /* Init Machine Check Exception if available. */ mcheck_init(c); #endif select_idle_routine(c); -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif @@ -682,12 +733,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 + enable_sep_cpu(); +#endif mtrr_ap_init(); } -- cgit v1.2.3 From 143b604a2d07ff69cc2bc02ecd9395b28e0b5292 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 09:37:15 +0200 Subject: x86: cpu/common*.c, merge whitespaces Merge leftover whitespaces, to make arch/x86/kernel/cpu/common_64.c exactly identical to arch/x86/kernel/cpu/common.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6a42371a609f..eef868c97b89 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -26,6 +26,7 @@ #include #include #endif + #include #include #include @@ -280,7 +281,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -307,7 +307,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); #else - /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -334,6 +333,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; @@ -443,7 +443,6 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } - static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -452,7 +451,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -488,7 +486,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #endif } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -500,7 +497,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else @@ -722,12 +718,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_init(c); #endif + select_idle_routine(c); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif - } void __init identify_boot_cpu(void) -- cgit v1.2.3 From f5017cfa3591d8eaf5c792e40ff30f18e498b68c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:14 -0700 Subject: x86: use cpu/common.c on 64 bit Use cpu/common.c on both 64-bit and 32-bit and remove cpu/common_64.c. We started out with this linecount: 816 arch/x86/kernel/cpu/common_64.c 805 arch/x86/kernel/cpu/common.c and the resulting common.c is 1197 lines long, so there's already 424 lines of code eliminated in this phase of the unification. Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 6 +- arch/x86/kernel/cpu/common_64.c | 1197 --------------------------------------- 2 files changed, 3 insertions(+), 1200 deletions(-) delete mode 100644 arch/x86/kernel/cpu/common_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 403e689df0b8..d031f248dfc0 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,10 +3,10 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o capflags.o powerflags.o +obj-y += proc.o capflags.o powerflags.o common.o -obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o -obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index eef868c97b89..000000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null @@ -1,1197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -static struct cpu_dev *this_cpu __cpuinitdata; - -#ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -#else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; -#endif -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif -} - -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while (*p == ' ') - p++; - if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx, l2size; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - l2size = ecx >> 16; - -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if (l2size == 0) - return; /* Again, no L2 cache is possible */ -#endif - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif - } - -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; - } - } - - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; - } - } -} - -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - u32 ebx; - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } - -#ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -#endif -} -/* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, - * cache alignment. - * The others are not touched to avoid unwanted side effects. - * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. - */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - if (!have_cpuid_p()) - return; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - validate_pat_support(c); -} - -void __init early_cpu_init(void) -{ - struct cpu_dev **cdev; - int count = 0; - - printk("KERNEL supported cpus:\n"); - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; - unsigned int j; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } - - early_identify_cpu(&boot_cpu_data); -} - -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -{ - if (!have_cpuid_p()) - return; - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif - -#ifdef CONFIG_X86_HT - c->phys_proc_id = c->initial_apicid; -#endif - } - - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ - - init_scattered_cpuid_features(c); - detect_nopl(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 - c->x86_coreid_bits = 0; - c->x86_clflush_size = 64; -#else - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - if (this_cpu->c_identify) - this_cpu->c_identify(c); - -#ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); -#endif - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* If the model name is still unset, do table lookup. */ - if (!c->x86_model_id[0]) { - char *p; - p = table_lookup_model(c); - if (p) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif - - select_idle_routine(c); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - numa_add_cpu(smp_processor_id()); -#endif -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -#ifdef CONFIG_X86_32 - sysenter_setup(); - enable_sep_cpu(); -#endif -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); -#ifdef CONFIG_X86_32 - enable_sep_cpu(); -#endif - mtrr_ap_init(); -} - -struct msr_range { - unsigned min; - unsigned max; -}; - -static struct msr_range msr_range_array[] __cpuinitdata = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __cpuinit print_cpu_msr(void) -{ - unsigned index; - u64 val; - int i; - unsigned index_min, index_max; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr __cpuinitdata; -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk(KERN_CONT "%s ", vendor); - - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - else - printk(KERN_CONT "%d86", c->x86); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); - -#ifdef CONFIG_SMP - if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -#ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#else - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - return regs; -} -#endif - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit - */ -#ifdef CONFIG_X86_64 -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - if (cpu != 0 && x2apic) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_sp0(t, thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); - - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - - xsave_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif - -#endif -- cgit v1.2.3 From bd220a24a9263eaa70d5e6821383085950b5a13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Sep 2008 00:58:28 -0700 Subject: x86: move nonx_setup etc from common.c to init_64.c like 32 bit put it in init_32.c Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 54 -------------------------------------------- 1 file changed, 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eef868c97b89..286c89a991bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -847,51 +847,6 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - void pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -957,15 +912,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - unsigned long kernel_eflags; /* -- cgit v1.2.3 From 7cfb0435330364f90f274a26ecdc5f47f738498c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Sep 2008 21:37:24 +0000 Subject: HPET: make minimum reprogramming delta useful The minimum reprogramming delta was hardcoded in HPET ticks, which is stupid as it does not work with faster running HPETs. The C1E idle patches made this prominent on AMD/RS690 chipsets, where the HPET runs with 25MHz. Set it to 5us which seems to be a reasonable value and fixes the problems on the bug reporters machines. We have a further sanity check now in the clock events, which increases the delta when it is not sufficient. Signed-off-by: Thomas Gleixner Tested-by: Luiz Fernando N. Capitulino Tested-by: Dmitry Nezhevenko Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 59fd3b6b1303..2256315416d8 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void) /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); + /* 5 usec minimum reprogramming delta. */ + hpet_clockevent.min_delta_ns = 5000; /* * Start hpet with the boot cpu mask and make it -- cgit v1.2.3 From 551b4545bf9658dc5ae5a1277dcd4d7bf0028b28 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 5 Sep 2008 17:58:49 +0900 Subject: x86: gart alloc_coherent doesn't need to check NULL device argument asm/dma-mapping.h guarantees that gart alloc_coherent doesn't get NULL device argument. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d0864900b8c..0b99d4a06f74 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -506,9 +506,6 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; - if (!dev) - dev = &x86_dma_fallback_dev; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, align_mask); flush_gart(); -- cgit v1.2.3 From 913da64b54b2b3bb212a59aba2e6f2b8294ca1fa Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Wed, 3 Sep 2008 14:30:23 +0100 Subject: x86: build fix for !CONFIG_SMP Move reset_lazy_tlbstate into tlb_32.c, and define noop versions of play_dead() in process_{32,64}.c when !CONFIG_SMP. Signed-off-by: Alex Nixon Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 7 ------- arch/x86/kernel/process_32.c | 7 +++++++ arch/x86/kernel/process_64.c | 7 +++++++ arch/x86/kernel/tlb_32.c | 8 ++++++++ 4 files changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 18f5551b32dd..76d10d5c2fa7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -714,10 +714,3 @@ void __cpuinit cpu_init(void) mxcsr_feature_mask_init(); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 633cf06578fa..b76b38ff962b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -72,6 +72,13 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } +#ifndef CONFIG_SMP +static inline void play_dead(void) +{ + BUG(); +} +#endif + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa28ed01cdf3..ec27afa43d7e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -85,6 +85,13 @@ void exit_idle(void) __exit_idle(); } +#ifndef CONFIG_SMP +static inline void play_dead(void) +{ + BUG(); +} +#endif + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b7..e00534b33534 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -241,3 +241,11 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } +void reset_lazy_tlbstate(void) +{ + int cpu = raw_smp_processor_id(); + + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} + -- cgit v1.2.3 From cf169702ba6928cee9d4f4adf3e932b643b8db7a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Sep 2008 13:13:40 +0200 Subject: x86, gart: add detection of AMD family 0x11 northbridges This patch adds the detection of the northbridges in the AMD family 0x11 processors. It also fixes the magic numbers there while changing this code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/k8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb21335..304d8bad6559 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); -- cgit v1.2.3 From e51af6630848406fc97adbd71443818cdcda297b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Sep 2008 09:54:37 +0100 Subject: x86: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. I'd still be grateful if someone could test it on a DG33BU with the old BIOS though, since I've killed mine. I tested the DMI version, but not this one. Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..24bb5faf5efa 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) } +#ifdef CONFIG_DMAR +static void __init intel_g33_dmar(int num, int slot, int func) +{ + struct acpi_table_header *dmar_tbl; + acpi_status status; + + status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); + if (ACPI_SUCCESS(status)) { + printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); + dmar_disabled = 1; + } +} +#endif + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, +#ifdef CONFIG_DMAR + { PCI_VENDOR_ID_INTEL, 0x29c0, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, +#endif {} }; -- cgit v1.2.3 From b6734c35af028f06772c0b2c836c7d579e6d4dad Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/feature_names.c | 3 ++- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..0785b3c8d043 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017e..c3afba5a81a7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..c9017799497c 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ -- cgit v1.2.3 From f31d731e4467e61de51d7f6d7115f3b712d9354c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..65a0c1b48696 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From f7676254f179eac6b5244a80195ec8ae0e9d4606 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Sep 2008 03:03:32 +0200 Subject: x86: HPET fix moronic 32/64bit thinko We use the HPET only in 32bit mode because: 1) some HPETs are 32bit only 2) on i386 there is no way to read/write the HPET atomic 64bit wide The HPET code unification done by the "moron of the year" did not take into account that unsigned long is different on 32 and 64 bit. This thinko results in a possible endless loop in the clockevents code, when the return comparison fails due to the 64bit/332bit unawareness. unsigned long cnt = (u32) hpet_read() + delta can wrap over 32bit. but the final compare will fail and return -ETIME causing endless loops. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 2256315416d8..801497a16e0e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -270,15 +270,15 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, } static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) + struct clock_event_device *evt) { - unsigned long cnt; + u32 cnt; cnt = hpet_readl(HPET_COUNTER); - cnt += delta; + cnt += (u32) delta; hpet_writel(cnt, HPET_T0_CMP); - return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; + return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } /* -- cgit v1.2.3 From 72d43d9bc9210d24d09202eaf219eac09e17b339 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Sep 2008 03:06:08 +0200 Subject: x86: HPET: read back compare register before reading counter After fixing the u32 thinko I sill had occasional hickups on ATI chipsets with small deltas. There seems to be a delay between writing the compare register and the transffer to the internal register which triggers the interrupt. Reading back the value makes sure, that it hit the internal match register befor we compare against the counter value. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 801497a16e0e..73deaffadd03 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -278,6 +278,13 @@ static int hpet_legacy_next_event(unsigned long delta, cnt += (u32) delta; hpet_writel(cnt, HPET_T0_CMP); + /* + * We need to read back the CMP register to make sure that + * what we wrote hit the chip before we compare it to the + * counter. + */ + WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 1b05d60d60e81c6594da8298107a05b506f01797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:27 -0700 Subject: x86: remove duplicated get_model_name() calling Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 3 --- arch/x86/kernel/cpu/amd_64.c | 3 +-- arch/x86/kernel/cpu/centaur.c | 1 - arch/x86/kernel/cpu/common.c | 9 +++------ arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/transmeta.c | 1 - 7 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d64ea6097ca7..e0ba2c7c5a18 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -42,7 +42,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; #ifdef CONFIG_SMP unsigned long long value; @@ -75,8 +74,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ clear_cpu_cap(c, 0*32+31); - r = get_model_name(c); - switch (c->x86) { case 4: /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1c721c0c49f..c5fbf7477ead 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -157,8 +157,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - level = get_model_name(c); - if (!level) { + if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: /* Should distinguish Models here, but this is only diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5f6d89521bf..89bfdd9cacc6 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); - get_model_name(c); display_cacheinfo(c); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 286c89a991bd..a8b9b7242428 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -252,13 +252,13 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -int __cpuinit get_model_name(struct cpuinfo_x86 *c) +static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; if (c->extended_cpuid_level < 0x80000004) - return 0; + return; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -277,8 +277,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } - - return 1; } void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) @@ -610,8 +608,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #endif } - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ + get_model_name(c); /* Default name */ init_scattered_cpuid_features(c); detect_nopl(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3cc9d92afd8f..de4094a39210 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -31,7 +31,6 @@ struct cpu_dev { extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; -extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3f8c7283d816..ffd0f5ed071a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); - get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 7c46e6ecedca..738e03244f95 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ -- cgit v1.2.3 From e3224234717b4228c235cee401af89212f17a3a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:28 -0700 Subject: x86, cpu init: call early_init_xxx in init_xxx so we: 1. could set some cap to ap 2. restore some cap after memset in identify_cpu for boot cpu esp for CONSTANT_TSC this matters, as: before this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs after this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs so constant_tsc is back... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 ++----- arch/x86/kernel/cpu/amd_64.c | 2 ++ arch/x86/kernel/cpu/centaur_64.c | 2 ++ arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/cpu/intel_64.c | 2 ++ 5 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e0ba2c7c5a18..c3175da7bc69 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,11 +26,8 @@ __asm__(".align 4\nvide: ret"); static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - } + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); /* Set MTRR capability flag if appropriate */ if (c->x86_model == 13 || c->x86_model == 9 || diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c5fbf7477ead..8c2d07f06f1d 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -140,6 +140,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 49cfc6d2f2fb..0e5cf17a3c89 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -16,6 +16,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { + early_init_centaur(c); + if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8b9b7242428..e8045c4ef1c1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -473,9 +473,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); @@ -483,6 +480,10 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } #endif + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 0c0a58dfe099..14a2cd98d480 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -54,6 +54,8 @@ static void __cpuinit srat_detect_node(void) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + early_init_intel(c); + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); -- cgit v1.2.3 From bb57925f5057837c248b57a51181fd6b138a32f3 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:26:55 -0700 Subject: x86_32: signal: use syscall_get_nr and error Use asm/syscall.h interfaces that do the same things. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b21070ea33a4..3e4a688bb84f 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "sigframe.h" @@ -507,9 +508,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (regs->ax) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -623,9 +624,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (regs->ax) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v1.2.3 From 72fa50f4ef9014f4212945b766af84ea94308903 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:27:11 -0700 Subject: x86_32: signal: introduce signal_fault() implement signal_fault() for 32bit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 3e4a688bb84f..76d05d703845 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -243,7 +243,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) return ax; badframe: - force_sig(SIGSEGV, current); + signal_fault(regs, frame, "rt sigreturn"); return 0; } @@ -669,3 +669,19 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) clear_thread_flag(TIF_IRET); } + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where) +{ + struct task_struct *me = current; + + if (show_unhandled_signals && printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", + me->comm, me->pid, where, frame, + regs->ip, regs->sp, regs->orig_ax); + print_vma_addr(" in ", regs->ip); + printk(KERN_CONT "\n"); + } + + force_sig(SIGSEGV, me); +} -- cgit v1.2.3 From 8fcd8e20f388d787f6abf701b11037b122029d5b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:27:39 -0700 Subject: x86: signal: make NR_restart_syscall make NR_restart_syscall macro for cosmetic unification of handle_signal(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 3 ++- arch/x86/kernel/signal_64.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 76d05d703845..bd9b65031a9a 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -572,6 +572,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return 0; } +#define NR_restart_syscall __NR_restart_syscall /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -635,7 +636,7 @@ static void do_signal(struct pt_regs *regs) break; case -ERESTART_RESTARTBLOCK: - regs->ax = __NR_restart_syscall; + regs->ax = NR_restart_syscall; regs->ip -= 2; break; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 823a55bf8c39..19e2b9143205 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -362,6 +362,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +#define NR_restart_syscall \ + test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -423,9 +425,7 @@ static void do_signal(struct pt_regs *regs) regs->ip -= 2; break; case -ERESTART_RESTARTBLOCK: - regs->ax = test_thread_flag(TIF_IA32) ? - __NR_ia32_restart_syscall : - __NR_restart_syscall; + regs->ax = NR_restart_syscall; regs->ip -= 2; break; } -- cgit v1.2.3 From 1d13024e624d0e2e47f117b383917249a41ef5ce Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:28:06 -0700 Subject: x86: signal: split out frame setups Make setup_rt_frame() and split out frame setups from handle_signal(). This is for cosmetic unification of handle_signal(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 29 ++++++++++++++++++++--------- arch/x86/kernel/signal_64.c | 30 ++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index bd9b65031a9a..48982f7ce886 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -338,8 +338,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } static int -setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, - struct pt_regs *regs) +__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs) { struct sigframe __user *frame; void __user *restorer; @@ -416,8 +416,8 @@ give_sigsegv: return -EFAULT; } -static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; void __user *restorer; @@ -501,6 +501,21 @@ give_sigsegv: /* * OK, we're invoking a handler: */ +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + int ret; + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + ret = __setup_rt_frame(sig, ka, info, set, regs); + else + ret = __setup_frame(sig, ka, set, regs); + + return ret; +} + static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) @@ -537,11 +552,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(sig, ka, info, oldset, regs); - else - ret = setup_frame(sig, ka, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret) return ret; diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 19e2b9143205..8fbdd23d5cc6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -195,8 +195,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(sp - size, 64); } -static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; void __user *fp = NULL; @@ -280,6 +280,24 @@ give_sigsegv: /* * OK, we're invoking a handler */ +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + int ret; + +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = ia32_setup_rt_frame(sig, ka, info, set, regs); + else + ret = ia32_setup_frame(sig, ka, set, regs); + } else +#endif + ret = __setup_rt_frame(sig, ka, info, set, regs); + + return ret; +} static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -317,14 +335,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) { - if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); - else - ret = ia32_setup_frame(sig, ka, oldset, regs); - } else -#endif ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { -- cgit v1.2.3 From 13ad7725e9955ac68fff670a039da99ab33e86a0 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:28:38 -0700 Subject: x86_32: signal: move signal number conversion to upper layer Bring signal number conversion in __setup_frame() and __setup_rt_frame() up into the common part setup_frame(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 48982f7ce886..b668efc18eae 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -344,7 +344,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct sigframe __user *frame; void __user *restorer; int err = 0; - int usig; void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -352,13 +351,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; - - err = __put_user(usig, &frame->sig); + err = __put_user(sig, &frame->sig); if (err) goto give_sigsegv; @@ -422,7 +415,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; - int usig; void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -430,13 +422,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; - - err |= __put_user(usig, &frame->sig); + err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, info); @@ -482,7 +468,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; regs->ip = (unsigned long)ka->sa.sa_handler; - regs->ax = (unsigned long)usig; + regs->ax = (unsigned long)sig; regs->dx = (unsigned long)&frame->info; regs->cx = (unsigned long)&frame->uc; @@ -506,12 +492,19 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { int ret; + int usig; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(sig, ka, info, set, regs); + ret = __setup_rt_frame(usig, ka, info, set, regs); else - ret = __setup_frame(sig, ka, set, regs); + ret = __setup_frame(usig, ka, set, regs); return ret; } -- cgit v1.2.3 From 12cf105cd66d95cf32c73cfa847a50bd1b700f23 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d043..8aab8517642e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* -- cgit v1.2.3 From dd786dd12c99634055a9066f25ea957f29991c22 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc3031..18514ed26104 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..a0534c04d38a 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index e710a21bb6e8..898a5a2002ed 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; -- cgit v1.2.3 From a19aac8548d85cf15d744335b8e82201da760d80 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Aug 2008 06:03:34 +0400 Subject: x86: make setup_xstate_init() __init WARNING: vmlinux.o(.text+0x22453): Section mismatch in reference from the function setup_xstate_init() to the function .init.text:__alloc_bootmem() The function setup_xstate_init() references the function __init __alloc_bootmem(). This is often because setup_xstate_init lacks a __init annotation or the annotation of __alloc_bootmem is wrong. Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 07713d64debe..ed5274a5bb0f 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -272,7 +272,7 @@ void __cpuinit xsave_init(void) /* * setup the xstate image representing the init state */ -void setup_xstate_init(void) +static void __init setup_xstate_init(void) { init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; -- cgit v1.2.3 From 30cec97967beb5aa08e893e8ff69623d5fecd9b7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 12:49:55 +0100 Subject: x86: init annotations in early_printk() setup Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 825e9136b026..02fc5b40c14b 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -120,7 +120,7 @@ static __init void early_serial_init(char *s) if (!strncmp(s, "0x", 2)) { early_serial_base = simple_strtoul(s, &e, 16); } else { - static int bases[] = { 0x3f8, 0x2f8 }; + static const int __initconst bases[] = { 0x3f8, 0x2f8 }; if (!strncmp(s, "ttyS", 4)) s += 4; @@ -920,7 +920,7 @@ static struct console simnow_console = { /* Direct interface for emergencies */ static struct console *early_console = &early_vga_console; -static int early_console_initialized; +static int __initdata early_console_initialized; asmlinkage void early_printk(const char *fmt, ...) { -- cgit v1.2.3 From 2d9cd6c27f00958a31622b8e9909d5e35f069b28 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 13:15:04 +0100 Subject: x86-64: add two __cpuinit annotations Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e8045c4ef1c1..5cde4b18448c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -845,7 +845,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -void pda_init(int cpu) +void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 726a5fcdf341..4b031a4ac856 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -860,7 +860,7 @@ error: return err; } -static void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_remove_device(unsigned int cpu) { int i; -- cgit v1.2.3 From d04ec773d7ca1bbc05a2768be95c1cebe2b07757 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index c3afba5a81a7..4f2eeb5652ea 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -529,17 +529,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v1.2.3 From 23952a96ae738277f3139b63d622e22984589031 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 4f2eeb5652ea..a11f5d4477cd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -640,19 +640,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v1.2.3 From 5df45515512436a808d3476a90e83f2efb022422 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 23:55:40 +0200 Subject: x86, tsc calibration: fix my brown paperbag day ... Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6dab90f68515..4847a9280505 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -319,7 +319,7 @@ static unsigned long quick_pit_calibrate(void) /* * Make sure we can rely on the second TSC timestamp: */ - if (!pit_expect_msb(--expect)) + if (!pit_expect_msb(expect)) goto failed; /* -- cgit v1.2.3 From 5394f80f92642c61fc2a95385be85f2fdcfb5adb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 01:51:32 -0700 Subject: x86: check for and defend against BIOS memory corruption Some BIOSes have been observed to corrupt memory in the low 64k. This change: - Reserves all memory which does not have to be in that area, to prevent it from being used as general memory by the kernel. Things like the SMP trampoline are still in the memory, however. - Clears the reserved memory so we can observe changes to it. - Adds a function check_for_bios_corruption() which checks and reports on memory becoming unexpectedly non-zero. Currently it's called in the x86 fault handler, and the powermanagement debug output. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d38..ee89ebc5aabc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -578,6 +578,89 @@ static struct x86_quirks default_x86_quirks __initdata; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; +/* + * Some BIOSes seem to corrupt the low 64k of memory during events + * like suspend/resume and unplugging an HDMI cable. Reserve all + * remaining free memory in that area and fill it with a distinct + * pattern. + */ +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +#define MAX_SCAN_AREAS 8 +static struct e820entry scan_areas[MAX_SCAN_AREAS]; +static int num_scan_areas; + +static void __init setup_bios_corruption_check(void) +{ + u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ + + while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) { + u64 size; + addr = find_e820_area_size(addr, &size, PAGE_SIZE); + + if (addr == 0) + break; + + if ((addr + size) > 0x10000) + size = 0x10000 - addr; + + if (size == 0) + break; + + e820_update_range(addr, size, E820_RAM, E820_RESERVED); + scan_areas[num_scan_areas].addr = addr; + scan_areas[num_scan_areas].size = size; + num_scan_areas++; + + /* Assume we've already mapped this early memory */ + memset(__va(addr), 0, size); + + addr += size; + } + + printk(KERN_INFO "scanning %d areas for BIOS corruption\n", + num_scan_areas); + update_e820(); +} + +static int __read_mostly bios_corruption_check = 1; + +void check_for_bios_corruption(void) +{ + int i; + int corruption = 0; + + if (!bios_corruption_check) + return; + + for(i = 0; i < num_scan_areas; i++) { + unsigned long *addr = __va(scan_areas[i].addr); + unsigned long size = scan_areas[i].size; + + for(; size; addr++, size -= sizeof(unsigned long)) { + if (!*addr) + continue; + printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", + addr, __pa(addr), *addr); + corruption = 1; + *addr = 0; + } + } + + if (corruption) + dump_stack(); +} + +static int set_bios_corruption_check(char *arg) +{ + char *end; + + bios_corruption_check = simple_strtol(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("bios_corruption_check", set_bios_corruption_check); +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -750,6 +833,10 @@ void __init setup_arch(char **cmdline_p) high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION + setup_bios_corruption_check(); +#endif + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< Date: Sun, 7 Sep 2008 01:51:33 -0700 Subject: x86: add periodic corruption check Perodically check for corruption in low phusical memory. Don't bother checking at fault time, since it won't show anything useful. Signed-off-by: Hugh Dickins Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ee89ebc5aabc..c239b3780973 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -623,6 +623,7 @@ static void __init setup_bios_corruption_check(void) } static int __read_mostly bios_corruption_check = 1; +static struct timer_list periodic_check_timer; void check_for_bios_corruption(void) { @@ -650,6 +651,22 @@ void check_for_bios_corruption(void) dump_stack(); } +static void periodic_check_for_corruption(unsigned long data) +{ + check_for_bios_corruption(); + mod_timer(&periodic_check_timer, jiffies + 60*HZ); +} + +void start_periodic_check_for_corruption(void) +{ + if (!bios_corruption_check) + return; + + init_timer(&periodic_check_timer); + periodic_check_timer.function = &periodic_check_for_corruption; + periodic_check_for_corruption(0); +} + static int set_bios_corruption_check(char *arg) { char *end; -- cgit v1.2.3 From 9f077871ce7237e2387fc76542b3b4033cb05e49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 01:51:34 -0700 Subject: x86: clean up memory corruption check and add more kernel parameters The corruption check is enabled in Kconfig by default, but disabled at runtime. This patch adds several kernel parameters to control the corruption check's behaviour; these are documented in kernel-parameters.txt. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 80 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c239b3780973..27ae91288855 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -586,22 +586,71 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; */ #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION #define MAX_SCAN_AREAS 8 + +static int __read_mostly memory_corruption_check = 0; +static unsigned __read_mostly corruption_check_size = 64*1024; +static unsigned __read_mostly corruption_check_period = 60; /* seconds */ + static struct e820entry scan_areas[MAX_SCAN_AREAS]; static int num_scan_areas; + +static int set_corruption_check(char *arg) +{ + char *end; + + memory_corruption_check = simple_strtol(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("memory_corruption_check", set_corruption_check); + +static int set_corruption_check_period(char *arg) +{ + char *end; + + corruption_check_period = simple_strtoul(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("memory_corruption_check_period", set_corruption_check_period); + +static int set_corruption_check_size(char *arg) +{ + char *end; + unsigned size; + + size = memparse(arg, &end); + + if (*end == '\0') + corruption_check_size = size; + + return (size == corruption_check_size) ? 0 : -EINVAL; +} +early_param("memory_corruption_check_size", set_corruption_check_size); + + static void __init setup_bios_corruption_check(void) { u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ - while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) { + if (corruption_check_size == 0) + memory_corruption_check = 0; + + if (!memory_corruption_check) + return; + + corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); + + while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { u64 size; addr = find_e820_area_size(addr, &size, PAGE_SIZE); if (addr == 0) break; - if ((addr + size) > 0x10000) - size = 0x10000 - addr; + if ((addr + size) > corruption_check_size) + size = corruption_check_size - addr; if (size == 0) break; @@ -617,12 +666,11 @@ static void __init setup_bios_corruption_check(void) addr += size; } - printk(KERN_INFO "scanning %d areas for BIOS corruption\n", + printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); update_e820(); } -static int __read_mostly bios_corruption_check = 1; static struct timer_list periodic_check_timer; void check_for_bios_corruption(void) @@ -630,7 +678,7 @@ void check_for_bios_corruption(void) int i; int corruption = 0; - if (!bios_corruption_check) + if (!memory_corruption_check) return; for(i = 0; i < num_scan_areas; i++) { @@ -647,35 +695,27 @@ void check_for_bios_corruption(void) } } - if (corruption) - dump_stack(); + WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); } static void periodic_check_for_corruption(unsigned long data) { check_for_bios_corruption(); - mod_timer(&periodic_check_timer, jiffies + 60*HZ); + mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ); } void start_periodic_check_for_corruption(void) { - if (!bios_corruption_check) + if (!memory_corruption_check || corruption_check_period == 0) return; + printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", + corruption_check_period); + init_timer(&periodic_check_timer); periodic_check_timer.function = &periodic_check_for_corruption; periodic_check_for_corruption(0); } - -static int set_bios_corruption_check(char *arg) -{ - char *end; - - bios_corruption_check = simple_strtol(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; -} -early_param("bios_corruption_check", set_bios_corruption_check); #endif /* -- cgit v1.2.3 From c885df50f571faf9fd9f395361cfff1b3a16e06e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 02:37:32 -0700 Subject: x86: default corruption check to off, but put parameter default in Kconfig Default the low memory corruption check to off, but make the default setting of the memory_corruption_check kernel parameter a config parameter. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 27ae91288855..ec7e56c1b984 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -587,7 +587,8 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION #define MAX_SCAN_AREAS 8 -static int __read_mostly memory_corruption_check = 0; +static int __read_mostly memory_corruption_check = -1; + static unsigned __read_mostly corruption_check_size = 64*1024; static unsigned __read_mostly corruption_check_period = 60; /* seconds */ @@ -634,6 +635,16 @@ static void __init setup_bios_corruption_check(void) { u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ + if (memory_corruption_check == -1) { + memory_corruption_check = +#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK + 1 +#else + 0 +#endif + ; + } + if (corruption_check_size == 0) memory_corruption_check = 0; -- cgit v1.2.3 From 11fdd252bb5b461289fc5c21dc8fc87dc66f3284 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:50 -0700 Subject: x86: cpu make amd.c more like amd_64.c v2 1. make 32bit have early_init_amd_mc and amd_detect_cmp 2. seperate init_amd_k5/k6/k7 ... v2: fix compiling for !CONFIG_SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 396 ++++++++++++++++++++++++------------------- arch/x86/kernel/cpu/amd_64.c | 17 +- arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 235 insertions(+), 180 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c3175da7bc69..a3a9e3cbdea9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,200 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits; + + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; +#endif +} + static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { + early_init_amd_mc(c); + if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); @@ -37,9 +229,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - #ifdef CONFIG_SMP unsigned long long value; @@ -50,7 +239,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 15) { + if (c->x86 == 0xf) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -73,192 +262,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; + init_amd_k5(c); + break; case 5: - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - break; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_cpu_cap(c, X86_FEATURE_K8); + init_amd_k6(c); break; - case 6: - set_cpu_cap(c, X86_FEATURE_K7); + case 6: /* An Athlon/Duron */ + init_amd_k7(c); break; } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - display_cacheinfo(c); + if (!c->x86_model_id[0]) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } - if (cpuid_eax(0x80000000) >= 0x80000008) - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + display_cacheinfo(c); -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distinguish the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif + detect_ht(c); - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; else num_cache_leaves = 3; } - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 8c2d07f06f1d..7913e48f6739 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -174,17 +174,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } if (c->x86 >= 0xf && c->x86 <= 0x11) set_cpu_cap(c, X86_FEATURE_K8); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } if (c->x86 == 0x10) { /* do this for boot cpu */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5cde4b18448c..9a57106c1995 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -629,8 +629,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else c->cpuid_level = -1; /* CPUID not detected */ -- cgit v1.2.3 From c58606ad551e9a1c85a9bd4f1698ca41ec279b2c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:51 -0700 Subject: x86: remove duplicated force_mwait Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 2 -- arch/x86/kernel/process.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7913e48f6739..466a1eaa81ea 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -9,8 +9,6 @@ #include "cpu.h" -int force_mwait __cpuinitdata; - #ifdef CONFIG_NUMA static int __cpuinit nearby_node(int apicid) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..9f94bb1c8117 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,7 +15,6 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; -static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { -- cgit v1.2.3 From 2b86473604f6e9aef0b5e89c56798a90ccddcdbe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:52 -0700 Subject: x86: add srat_detect_node for amd64 separate that from amd_detect_cmp() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 466a1eaa81ea..20c3f12dfe78 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -36,19 +36,23 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif + bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; +#endif +} +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ #ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) node = apicid_to_node[apicid]; @@ -76,7 +80,6 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif -#endif } static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) @@ -169,8 +172,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) -- cgit v1.2.3 From 8d71a2ea0ad4ef9b9076ffd44726bad1f0ccf59b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:53 -0700 Subject: x86: merge header in amd_64.c Singed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 8 ++++++++ arch/x86/kernel/cpu/amd_64.c | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a3a9e3cbdea9..3c8090d10053 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,11 +1,19 @@ #include #include #include + #include #include #include +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif + #include + #include "cpu.h" /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 20c3f12dfe78..1d83601e85ef 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -1,9 +1,16 @@ #include +#include #include -#include -#include -#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif #include -- cgit v1.2.3 From 6c62aa4a3c12989a1f1fcbbe6f1ee5d4de4b2300 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:54 -0700 Subject: x86: make amd.c have 64bit support code Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 137 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3c8090d10053..32e73520adf7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -16,6 +16,7 @@ #include "cpu.h" +#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -177,6 +178,26 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K7); } +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. @@ -196,6 +217,42 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -226,13 +283,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -256,18 +319,31 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + u32 level; + + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + switch (c->x86) { case 4: init_amd_k5(c); @@ -283,7 +359,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); +#endif + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); @@ -300,10 +378,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } +#ifdef CONFIG_X86_32 detect_ht(c); +#endif if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) @@ -319,8 +401,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } + +#ifdef CONFIG_X86_64 + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -333,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -349,9 +463,10 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_size_cache = amd_size_cache, .c_x86_vendor = X86_VENDOR_AMD, }; -- cgit v1.2.3 From 2a02505055fdd44958efd0e140dd87cb9fdecaf1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:55 -0700 Subject: x86: make amd_64 have 32 bit code Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 259 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 247 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 1d83601e85ef..32e73520adf7 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -16,7 +16,171 @@ #include "cpu.h" -#ifdef CONFIG_NUMA +#ifdef CONFIG_X86_32 +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) static int __cpuinit nearby_node(int apicid) { int i, node; @@ -41,7 +205,7 @@ static int __cpuinit nearby_node(int apicid) */ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits; bits = c->x86_coreid_bits; @@ -55,7 +219,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; unsigned apicid = hard_smp_processor_id(); @@ -91,7 +255,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits, ecx; /* Multi core CPU? */ @@ -112,7 +276,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) } c->x86_coreid_bits = bits; - #endif } @@ -124,15 +287,21 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - unsigned level; - #ifdef CONFIG_SMP - unsigned long value; + unsigned long long value; /* * Disable TLB flush filter by setting HWCR.FFDIS on K8 @@ -142,26 +311,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 122 for all steppings (F+ have it disabled by default) */ if (c->x86 == 0xf) { - rdmsrl(MSR_K8_HWCR, value); + rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); + wrmsrl(MSR_K7_HWCR, value); } #endif early_init_amd(c); - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 /* On C+ stepping K8 rep microcode works well for copy/memset */ if (c->x86 == 0xf) { + u32 level; + level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + switch (c->x86) { + case 4: + init_amd_k5(c); + break; + case 5: + init_amd_k6(c); + break; + case 6: /* An Athlon/Duron */ + init_amd_k7(c); + break; + } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); +#endif /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) @@ -176,6 +374,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } } + display_cacheinfo(c); /* Multi core CPU? */ @@ -184,6 +383,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) srat_detect_node(c); } +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; @@ -199,6 +402,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } +#ifdef CONFIG_X86_64 if (c->x86 == 0x10) { /* do this for boot cpu */ if (c == &boot_cpu_data) @@ -225,11 +429,42 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_memory_4k((unsigned long)__va(tseg), 1); } } +#endif +} + +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, -- cgit v1.2.3 From ff73152ced60871f7d5fb7dee52fa499902a3c6d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:56 -0700 Subject: x86: make 64 bit to use amd.c arch/x86/kernel/cpu/amd.c is now 100% identical to arch/x86/kernel/cpu/amd_64.c, so use amd.c on 64-bit too and fix up the namespace impact. Simplify the Kconfig glue as well. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/amd_64.c | 473 ------------------------------------------- 2 files changed, 1 insertion(+), 475 deletions(-) delete mode 100644 arch/x86/kernel/cpu/amd_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d031f248dfc0..510d1bcb058a 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -10,8 +10,7 @@ obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o -obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o -obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index 32e73520adf7..000000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null @@ -1,473 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_X86_64 -# include -# include -# include -#endif - -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_32 -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); -} -#endif - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; -#endif -} - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - int cpu = smp_processor_id(); - int node; - unsigned apicid = hard_smp_processor_id(); - - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned long long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - early_init_amd(c); - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } - -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - - if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} -#endif - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_size_cache = amd_size_cache, -#endif - .c_early_init = early_init_amd, - .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, -}; - -cpu_dev_register(amd_cpu_dev); -- cgit v1.2.3 From f69feff720497237ae9dd2f4604921bd3080c421 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:58 -0700 Subject: x86: little clean up of intel.c/intel_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 +++---- arch/x86/kernel/cpu/intel_64.c | 8 +++----- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 959417b8cd64..4a8ac9d4ff50 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -76,7 +76,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -183,7 +183,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); detect_ht(c); } @@ -210,9 +210,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) { + if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); - } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 14a2cd98d480..aef4f2826313 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -71,17 +71,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) ds_init_intel(c); + } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) -- cgit v1.2.3 From de9f521fb72dd091aa4989fe2e004ecf4785a850 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:11 +0900 Subject: x86: move pci-nommu's dma_mask check to common code The check to see if dev->dma_mask is NULL in pci-nommu is more appropriate for dma_alloc_coherent(). Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 73853d3fdcac..0f51883cc6a8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,9 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - if (hwdev->dma_mask == NULL) - return NULL; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; -- cgit v1.2.3 From 8a53ad675f86ee003482b557da944e070d3c4859 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:12 +0900 Subject: x86: fix nommu_alloc_coherent allocation with NULL device argument We need to use __GFP_DMA for NULL device argument (fallback_dev) with pci-nommu. It's a hack for ISA (and some old code) so we need to use GFP_DMA. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 0f51883cc6a8..ada1c87cafc2 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,7 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; dma_mask = hwdev->coherent_dma_mask; @@ -93,7 +92,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, node = dev_to_node(hwdev); #ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif -- cgit v1.2.3 From 823e7e8c6ef12cd1943dc42fe7595ca74e8cc3d7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:13 +0900 Subject: x86: dma_alloc_coherent sets gfp flags properly Non real IOMMU implemenations (which doesn't do virtual mappings, e.g. swiotlb, pci-nommu, etc) need to use proper gfp flags and dma_mask to allocate pages in their own dma_alloc_coherent() (allocated page need to be suitable for device's coherent_dma_mask). This patch makes dma_alloc_coherent do this job so that IOMMUs don't need to take care of it any more. Real IOMMU implemenataions can simply ignore the gfp flags. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index ada1c87cafc2..8e398b56f50b 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,26 +80,11 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp |= __GFP_ZERO; - - dma_mask = hwdev->coherent_dma_mask; - if (!dma_mask) - dma_mask = *(hwdev->dma_mask); + dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - if (dma_mask < DMA_24BIT_MASK) - return NULL; + gfp |= __GFP_ZERO; node = dev_to_node(hwdev); - -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - - /* No alloc-free penalty for ISA devices */ - if (dma_mask == DMA_24BIT_MASK) - gfp |= GFP_DMA; - again: page = alloc_pages_node(node, gfp, get_order(size)); if (!page) -- cgit v1.2.3 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f916..0b8261c3cac2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); + notify_cpu_starting(cpuid); /* * Get our bogomips. * -- cgit v1.2.3 From d6be118a97ce51ca84035270f91c2bccecbfac5f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 9 Sep 2008 09:56:08 -0400 Subject: x86: fix memmap=exactmap boot argument When using kdump modifying the e820 map is yielding strange results. For example starting with BIOS-provided physical RAM map: BIOS-e820: 0000000000000100 - 0000000000093400 (usable) BIOS-e820: 0000000000093400 - 00000000000a0000 (reserved) BIOS-e820: 0000000000100000 - 000000003fee0000 (usable) BIOS-e820: 000000003fee0000 - 000000003fef3000 (ACPI data) BIOS-e820: 000000003fef3000 - 000000003ff80000 (ACPI NVS) BIOS-e820: 000000003ff80000 - 0000000040000000 (reserved) BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved) BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved) BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved) and booting with args memmap=exactmap memmap=640K@0K memmap=5228K@16384K memmap=125188K@22252K memmap=76K#1047424K memmap=564K#1047500K resulted in: user-defined physical RAM map: user: 0000000000000000 - 0000000000093400 (usable) user: 0000000000093400 - 00000000000a0000 (reserved) user: 0000000000100000 - 000000003fee0000 (usable) user: 000000003fee0000 - 000000003fef3000 (ACPI data) user: 000000003fef3000 - 000000003ff80000 (ACPI NVS) user: 000000003ff80000 - 0000000040000000 (reserved) user: 00000000e0000000 - 00000000f0000000 (reserved) user: 00000000fec00000 - 00000000fec10000 (reserved) user: 00000000fee00000 - 00000000fee01000 (reserved) user: 00000000ff000000 - 0000000100000000 (reserved) But should have resulted in: user-defined physical RAM map: user: 0000000000000000 - 00000000000a0000 (usable) user: 0000000001000000 - 000000000151b000 (usable) user: 00000000015bb000 - 0000000008ffc000 (usable) user: 000000003fee0000 - 000000003ff80000 (ACPI data) This is happening because of an improper usage of strcmp() in the e820 parsing code. The strcmp() always returns !0 and never resets the value for e820.nr_map and returns an incorrect user-defined map. This patch fixes the problem. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7bb..66e48aa2dd1b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p) if (!p) return -EINVAL; - if (!strcmp(p, "exactmap")) { + if (!strncmp(p, "exactmap", 8)) { #ifdef CONFIG_CRASH_DUMP /* * If we are doing a crash dump, we still need to know -- cgit v1.2.3 From 185f3b9da24c09c26b784591ed354fe57998a7b1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:35 -0700 Subject: x86: make intel.c have 64-bit support code prepare for unification. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 119 ++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a66989586a84..365a008080c2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,11 @@ #include #include +#ifdef CONFIG_X86_64 +#include +#include +#endif + #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC @@ -25,14 +30,20 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * @@ -73,6 +84,40 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) } + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif +#endif + +static void __cpuinit srat_detect_node(void) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + /* * find out the number of processor cores on the die */ @@ -91,20 +136,6 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -139,6 +170,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); @@ -176,18 +208,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); - detect_ht(c); - } - - /* Work around errata */ Intel_errata_workarounds(c); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -206,14 +226,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) movsl_mask.mask = 7; break; } +#endif + #endif if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); @@ -224,6 +242,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + if (cpu_has_bts) ptrace_bts_init_intel(c); @@ -240,8 +269,25 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif +#endif + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + + /* Work around errata */ + srat_detect_node(); } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* @@ -254,10 +300,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i size = 256; return size; } +#endif static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = { @@ -307,13 +355,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, - .c_size_cache = intel_size_cache, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); -/* arch_initcall(intel_cpu_init); */ - -- cgit v1.2.3 From 58602c1681bdfa1a0deaa5574b8a72d6e30c0e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:36 -0700 Subject: x86: make intel_64.c the same as intel.c No change in functionality intended - this only adds the 32-bit side. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_64.c | 301 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 284 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index aef4f2826313..365a008080c2 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -1,42 +1,108 @@ #include +#include + +#include +#include #include +#include +#include + #include +#include +#include +#include #include +#include +#include + +#ifdef CONFIG_X86_64 #include #include +#endif #include "cpu.h" +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) + (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* - * find out the number of processor cores on the die + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - if (c->cpuid_level < 4) +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; + } + return 0; +} - cpuid_count(4, 0, &eax, &t, &t, &t); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); } +#endif +#endif static void __cpuinit srat_detect_node(void) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = hard_smp_processor_id(); @@ -52,11 +118,51 @@ static void __cpuinit srat_detect_node(void) #endif } +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + unsigned int l2 = 0; + char *p = NULL; + early_init_intel(c); - init_intel_cacheinfo(c); +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -64,8 +170,70 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * Names for the Pentium II/Celeron processors + * detectable only by also checking the cache size. + * Dixon is NOT a Celeron. + */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if (p) + strcpy(c->x86_model_id, p); + + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + +#endif + + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { - unsigned int l1, l2; + unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -74,26 +242,125 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + + if (cpu_has_bts) + ptrace_bts_init_intel(c); + + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +#endif detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + /* Work around errata */ srat_detect_node(); } +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* + * Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} +#endif + static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); + -- cgit v1.2.3 From 879d792b66d633bbe466974f61d1acc9aa8c78eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:37 -0700 Subject: x86: let intel 64-bit use intel.c now that arch/x86/kernel/cpu/intel_64.c and arch/x86/kernel/cpu/intel.c are equal, drop arch/x86/kernel/cpu/intel_64.c and fix up the glue. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/intel_64.c | 366 ----------------------------------------- 2 files changed, 1 insertion(+), 368 deletions(-) delete mode 100644 arch/x86/kernel/cpu/intel_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 510d1bcb058a..7f0b45a5d788 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,8 +8,7 @@ obj-y += proc.o capflags.o powerflags.o common.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 365a008080c2..000000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null @@ -1,366 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_64 -#include -#include -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -#endif -} - -#ifdef CONFIG_X86_32 -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - - -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif -#endif - -static void __cpuinit srat_detect_node(void) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - - early_init_intel(c); - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * Names for the Pentium II/Celeron processors - * detectable only by also checking the cache size. - * Dixon is NOT a Celeron. - */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#endif - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); - - if (cpu_has_bts) - ptrace_bts_init_intel(c); - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif -#endif - - detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - - /* Work around errata */ - srat_detect_node(); -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* - * Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} -#endif - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_size_cache = intel_size_cache, -#endif - .c_early_init = early_init_intel, - .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, -}; - -cpu_dev_register(intel_cpu_dev); - -- cgit v1.2.3 From 4052704d92c3172ebc13cc0cc8df8ba2bd446a5c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:38 -0700 Subject: x86: intel.c put workaround for old cpus together consolidate the code some more. No change in functionality intended. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 192 ++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 94 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 365a008080c2..5f76bf139fda 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -63,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void) return 0; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + /* + * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until + * model 3 mask 3 + */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); if ((lo & (1<<9)) == 0) { @@ -81,23 +120,44 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } -} - + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); +#ifdef CONFIG_X86_INTEL_USERCOPY /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * Set up the preferred alignment for movsl bulk memory moves */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } #endif + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +} +#else +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +{ +} #endif static void __cpuinit srat_detect_node(void) @@ -139,28 +199,10 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; - char *p = NULL; early_init_intel(c); -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif + intel_workarounds(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { @@ -170,17 +212,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); + } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + char *p = NULL; + switch (c->x86_model) { case 5: if (c->x86_mask == 0) { @@ -203,51 +260,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Celeron (Coppermine)"; break; } - } - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; + if (p) + strcpy(c->x86_model_id, p); } -#endif - -#endif - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) @@ -256,19 +273,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_bts) ptrace_bts_init_intel(c); - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif #endif detect_extended_topology(c); -- cgit v1.2.3 From ec70cae8698632917f06110fdb70c6364281ecc6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:39 -0700 Subject: x86: centaur_64.c remove duplicated setting of CONSTANT_TSC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/centaur_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 0e5cf17a3c89..a1625f5a1e78 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -20,7 +20,6 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_REP_GOOD); } set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -- cgit v1.2.3 From b2994ef0de252d41f1511e5f87704bedda12dabc Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:18:50 -0700 Subject: x86: signal_64.c: clean up signal_fault() clean up and make signal_fault() same as 32bit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 8fbdd23d5cc6..552a331313ff 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -473,12 +473,14 @@ void do_notify_resume(struct pt_regs *regs, void *unused, void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; + if (show_unhandled_signals && printk_ratelimit()) { - printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm, me->pid, where, frame, regs->ip, - regs->sp, regs->orig_ax); + printk(KERN_INFO + "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", + me->comm, me->pid, where, frame, + regs->ip, regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); - printk("\n"); + printk(KERN_CONT "\n"); } force_sig(SIGSEGV, me); -- cgit v1.2.3 From 0c40ed71736c20f447843b3c96b715bb9c4904d7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:21:17 -0700 Subject: x86: signal_64.c: arg for restore_i387_xstate() is void __user * restore_i387_xstate() is declared as: int restore_i387_xstate(void __user *buf); so, make the variable buf void __user *. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 552a331313ff..321da93f2fb7 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -94,7 +94,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; + err |= __get_user(buf, &sc->fpstate); err |= restore_i387_xstate(buf); } -- cgit v1.2.3 From 764e8d128f9343027cf09afe8a145e8ff186e129 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:22:12 -0700 Subject: x86: signal_64.c: make handle_signal() similar Make handle_signal() same as 32bit. No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 57 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 321da93f2fb7..43e6862fc7a2 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -338,39 +338,40 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret == 0) { - /* - * This has nothing to do with segment registers, - * despite the name. This magic affects uaccess.h - * macros' behavior. Reset it to the normal setting. - */ - set_fs(USER_DS); + if (ret) + return ret; - /* - * Clear the direction flag as per the ABI for function entry. - */ - regs->flags &= ~X86_EFLAGS_DF; + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~X86_EFLAGS_TF; + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; - tracehook_signal_handler(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - } + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); - return ret; + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + + return 0; } #define NR_restart_syscall \ -- cgit v1.2.3 From ac4ff656c07ada78316307b0c0ce8a8eb48aa6dd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:47 +0900 Subject: x86: convert gart to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0b99d4a06f74..1b0c412566e5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -214,24 +214,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - if (force_iommu) - mmu = 1; - - return mmu; + return force_iommu || + !is_buffer_dma_capable(*dev->dma_mask, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - return mmu; + return !is_buffer_dma_capable(*dev->dma_mask, addr, size); } /* Map a single continuous physical area into the IOMMU. -- cgit v1.2.3 From 49fbf4e9f982c704dc365698c5b5efa780aadcb5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:48 +0900 Subject: x86: convert pci-nommu to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 8e398b56f50b..1c1c98a31d57 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && bus + size > *hwdev->dma_mask) { + if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { if (*hwdev->dma_mask >= DMA_32BIT_MASK) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -79,6 +79,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, unsigned long dma_mask; int node; struct page *page; + dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(hwdev, gfp); @@ -90,14 +91,15 @@ again: if (!page) return NULL; - if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { free_pages((unsigned long)page_address(page), get_order(size)); gfp |= GFP_DMA; goto again; } - *dma_addr = page_to_phys(page); - if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + if (check_addr("alloc_coherent", hwdev, addr, size)) { + *dma_addr = addr; flush_write_buffers(); return page_address(page); } -- cgit v1.2.3 From e38e05a85828dac23540cd007df5f20985388afc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 10 Sep 2008 18:53:34 +0800 Subject: x86: extended "flags" to show virtualization HW feature in /proc/cpuinfo The hardware virtualization technology evolves very fast. But currently it's hard to tell if your CPU support a certain kind of HW technology without digging into the source code. The patch add a new catagory in "flags" under /proc/cpuinfo. Now "flags" can indicate the (important) HW virtulization features the CPU supported as well. Current implementation just cover Intel VMX side. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5f76bf139fda..99468dbd08da 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -196,6 +196,44 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + /* Intel VMX MSR indicated features */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + clear_cpu_cap(c, X86_FEATURE_VNMI); + clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + clear_cpu_cap(c, X86_FEATURE_EPT); + clear_cpu_cap(c, X86_FEATURE_VPID); + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -289,6 +327,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(); + + if (cpu_has(c, X86_FEATURE_VMX)) + detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From f461a1d80c865e5ec4d24107adbab8b010b60e32 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Sep 2008 13:57:43 +0200 Subject: arch/x86/kernel/kdebugfs.c: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Note that at the point of the change, node has not yet been stored in d, so it is not affected by the existing cleanup code. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc75514..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) if (PageHighMem(pg)) { data = ioremap_cache(pa_data, sizeof(*data)); if (!data) { + kfree(node); error = -ENXIO; goto err_dir; } -- cgit v1.2.3 From 0ad5bce7409d681a5655c66e64bb0eb740b89c1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 16:42:00 -0700 Subject: x86: fix possible x86_64 and EFI regression Russ Anderson reported a boot crash with EFI and latest mainline: BIOS-e820: 00000000fffa0000 - 00000000fffac000 (reserved) Pid: 0, comm: swapper Not tainted 2.6.27-rc5-00100-gec0c15a-dirty #5 Call Trace: [] early_idt_handler+0x55/0x69 [] __memcpy+0x12/0xa4 [] efi_init+0xce/0x932 [] setup_early_serial8250_console+0x2d/0x36a [] __insert_resource+0x18/0xc8 [] setup_arch+0x3a7/0x632 [] start_kernel+0x91/0x367 [] x86_64_start_kernel+0xe3/0xe7 [] x86_64_start_kernel+0x0/0xe7 RIP 0x10 Such a crash is possible if the CPU in this system is a 64-bit processor which doesn't support NX (ie, old Intel P4 -based64-bit processors). Certainly, if we support such processors, then we should start with _PAGE_NX initially clear in __supported_pte_flags, and then set it once we've established that the processor does indeed support NX. That will prevent early_ioremap - or anything else - from trying to set it. The simple fix is to simply call check_efer() earlier. Reported-by: Russ Anderson Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d38..9838f2539dfc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_X86_64 + check_efer(); +#endif + #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) /* * Must be before kernel pagetables are setup @@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - check_efer(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ -- cgit v1.2.3 From a0a29b62a9cac6b7d83b7514679f2ed8d33d4372 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 11 Sep 2008 23:27:52 +0200 Subject: x86, microcode rework, v2 this is a rework of the microcode splitup in tip/x86/microcode (1) I think this new interface is cleaner (look at the changes in 'struct microcode_ops' in microcode.h); (2) it's -64 lines of code; Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 84 ++++------ arch/x86/kernel/microcode_amd.c | 329 ++++++++++++++++++-------------------- arch/x86/kernel/microcode_intel.c | 220 +++++++++++-------------- 3 files changed, 285 insertions(+), 348 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index b2f84ce5eed3..902dada2eb6d 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -110,50 +110,28 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -void __user *user_buffer; /* user area microcode data buffer */ -EXPORT_SYMBOL_GPL(user_buffer); -unsigned int user_buffer_size; /* it's size */ -EXPORT_SYMBOL_GPL(user_buffer_size); - -static int do_microcode_update(void) +static int do_microcode_update(const void __user *buf, size_t size) { - long cursor = 0; + cpumask_t old; int error = 0; - void *new_mc = NULL; int cpu; - cpumask_t old; old = current->cpus_allowed; - while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { - if (microcode_ops->microcode_sanity_check != NULL) - error = microcode_ops->microcode_sanity_check(new_mc); - if (error) + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); + if (error < 0) goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->get_matching_microcode(new_mc, - cpu); - if (error < 0) - goto out; - if (error == 1) - microcode_ops->apply_microcode(cpu); - } - vfree(new_mc); + if (!error) + microcode_ops->apply_microcode(cpu); } out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; set_cpus_allowed_ptr(current, &old); return error; } @@ -178,10 +156,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, get_online_cpus(); mutex_lock(µcode_mutex); - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); + ret = do_microcode_update(buf, len); if (!ret) ret = (ssize_t)len; @@ -231,7 +206,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); /* fake device for request_firmware */ struct platform_device *microcode_pdev; -EXPORT_SYMBOL_GPL(microcode_pdev); static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, @@ -252,8 +226,12 @@ static ssize_t reload_store(struct sys_device *dev, if (cpu_online(cpu)) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); - if (uci->valid) - err = microcode_ops->cpu_request_microcode(cpu); + if (uci->valid) { + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + if (!err) + microcode_ops->apply_microcode(cpu); + } mutex_unlock(µcode_mutex); set_cpus_allowed_ptr(current, &old); } @@ -315,7 +293,7 @@ static void collect_cpu_info(int cpu) uci->valid = 1; } -static void microcode_resume_cpu(int cpu) +static int microcode_resume_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpu_signature nsig; @@ -323,7 +301,7 @@ static void microcode_resume_cpu(int cpu) pr_debug("microcode: CPU%d resumed\n", cpu); if (!uci->mc.valid_mc) - return; + return 1; /* * Let's verify that the 'cached' ucode does belong @@ -331,21 +309,22 @@ static void microcode_resume_cpu(int cpu) */ if (microcode_ops->collect_cpu_info(cpu, &nsig)) { microcode_fini_cpu(cpu); - return; + return -1; } if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { microcode_fini_cpu(cpu); /* Should we look for a new ucode here? */ - return; + return 1; } - microcode_ops->apply_microcode(cpu); + return 0; } void microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int err = 0; /* We should bind the task to the CPU */ BUG_ON(raw_smp_processor_id() != cpu); @@ -356,12 +335,17 @@ void microcode_update_cpu(int cpu) * otherwise just request a firmware: */ if (uci->valid) { - microcode_resume_cpu(cpu); + err = microcode_resume_cpu(cpu); } else { collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING) - microcode_ops->cpu_request_microcode(cpu); + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); } + + if (!err) + microcode_ops->apply_microcode(cpu); + mutex_unlock(µcode_mutex); } @@ -414,7 +398,7 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ - microcode_ops->apply_microcode(0); + microcode_update_cpu(0); return 0; } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index d606a05545ca..6815837a7753 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -59,7 +59,7 @@ MODULE_LICENSE("GPL v2"); /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -struct equiv_cpu_entry *equiv_cpu_table; +static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { @@ -83,36 +83,37 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) return 0; } -static int get_matching_microcode_amd(void *mc, int cpu) +static int get_matching_microcode(int cpu, void *mc, int rev) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_amd *mc_header = mc; - unsigned long total_size = get_totalsize(mc_header); - void *new_mc; struct pci_dev *nb_pci_dev, *sb_pci_dev; unsigned int current_cpu_id; unsigned int equiv_cpu_id = 0x00; unsigned int i = 0; - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - /* This is a tricky part. We might be called from a write operation */ - /* to the device file instead of the usual process of firmware */ - /* loading. This routine needs to be able to distinguish both */ -/* cases. This is done by checking if there alread is a equivalent */ - /* CPU table installed. If not, we're written through */ - /* /dev/cpu/microcode. */ -/* Since we ignore all checks. The error case in which going through */ -/* firmware loading and that table is not loaded has already been */ - /* checked earlier. */ + /* + * dimm: do we need this? Why an update via /dev/... is different + * from the one via firmware? + * + * This is a tricky part. We might be called from a write operation + * to the device file instead of the usual process of firmware + * loading. This routine needs to be able to distinguish both + * cases. This is done by checking if there alread is a equivalent + * CPU table installed. If not, we're written through + * /dev/cpu/microcode. + * Since we ignore all checks. The error case in which going through + * firmware loading and that table is not loaded has already been + * checked earlier. + */ + BUG_ON(equiv_cpu_table == NULL); +#if 0 if (equiv_cpu_table == NULL) { printk(KERN_INFO "microcode: CPU%d microcode update with " "version 0x%x (current=0x%x)\n", cpu, mc_header->patch_id, uci->cpu_sig.rev); goto out; } - +#endif current_cpu_id = cpuid_eax(0x00000001); while (equiv_cpu_table[i].installed_cpu != 0) { @@ -175,27 +176,9 @@ static int get_matching_microcode_amd(void *mc, int cpu) pci_dev_put(sb_pci_dev); } - if (mc_header->patch_id <= uci->cpu_sig.rev) + if (mc_header->patch_id <= rev) return 0; - printk(KERN_INFO "microcode: CPU%d found a matching microcode " - "update with version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->cpu_sig.rev); - -out: - new_mc = vmalloc(UCODE_MAX_SIZE); - if (!new_mc) { - printk(KERN_ERR "microcode: error, can't allocate memory\n"); - return -ENOMEM; - } - memset(new_mc, 0, UCODE_MAX_SIZE); - - /* free previous update file */ - vfree(uci->mc.mc_amd); - - memcpy(new_mc, mc, total_size); - - uci->mc.mc_amd = new_mc; return 1; } @@ -245,104 +228,65 @@ static void apply_microcode_amd(int cpu) uci->cpu_sig.rev = rev; } -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -extern void __user *user_buffer; /* user area microcode data buffer */ -extern unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode_amd(void **mc, long offset) -{ - struct microcode_header_amd mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(UCODE_MAX_SIZE); - if (!*mc) - return -ENOMEM; - memset(*mc, 0, UCODE_MAX_SIZE); - - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} -#else -#define get_next_ucode_amd() NULL -#endif - -static long get_next_ucode_from_buffer_amd(void **mc, void *buf, - unsigned long size, long offset) +static void * get_next_ucode(u8 *buf, unsigned int size, + int (*get_ucode_data)(void *, const void *, size_t), + unsigned int *mc_size) { - struct microcode_header_amd *mc_header; - unsigned long total_size; - unsigned char *buf_pos = buf; + unsigned int total_size; +#define UCODE_UNKNOWN_HDR 8 + u8 hdr[UCODE_UNKNOWN_HDR]; + void *mc; - /* No more data */ - if (offset >= size) - return 0; + if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR)) + return NULL; - if (buf_pos[offset] != UCODE_UCODE_TYPE) { + if (hdr[0] != UCODE_UCODE_TYPE) { printk(KERN_ERR "microcode: error! " "Wrong microcode payload type field\n"); - return -EINVAL; + return NULL; } - mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); + /* Why not by means of get_totalsize(hdr)? */ + total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); - total_size = (unsigned long) (buf_pos[offset+4] + - (buf_pos[offset+5] << 8)); + printk(KERN_INFO "microcode: size %u, total_size %u\n", + size, total_size); - printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", - size, total_size, offset); - - if (offset + total_size > size) { + if (total_size > size || total_size > UCODE_MAX_SIZE) { printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; + return NULL; } - *mc = vmalloc(UCODE_MAX_SIZE); - if (!*mc) { - printk(KERN_ERR "microcode: error! " - "Can not allocate memory for microcode patch\n"); - return -ENOMEM; + mc = vmalloc(UCODE_MAX_SIZE); + if (mc) { + memset(mc, 0, UCODE_MAX_SIZE); + if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) { + vfree(mc); + mc = NULL; + } else + *mc_size = total_size + UCODE_UNKNOWN_HDR; } - - memset(*mc, 0, UCODE_MAX_SIZE); - memcpy(*mc, buf + offset + 8, total_size); - - return offset + total_size + 8; +#undef UCODE_UNKNOWN_HDR + return mc; } -static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) + +static int install_equiv_cpu_table(u8 *buf, + int (*get_ucode_data)(void *, const void *, size_t)) { - unsigned int *buf_pos = buf; +#define UCODE_HEADER_SIZE 12 + u8 *hdr[UCODE_HEADER_SIZE]; + unsigned int *buf_pos = (unsigned int *)hdr; + unsigned long size; - /* No more data */ - if (offset >= size) + if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE)) return 0; - if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) { - printk(KERN_ERR "microcode: error! " - "Wrong microcode equivalnet cpu table type field\n"); - return 0; - } + size = buf_pos[2]; - if (size == 0) { + if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { printk(KERN_ERR "microcode: error! " - "Wrong microcode equivalnet cpu table length\n"); + "Wrong microcode equivalnet cpu table\n"); return 0; } @@ -352,79 +296,118 @@ static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) return 0; } - memset(equiv_cpu_table, 0, size); - memcpy(equiv_cpu_table, &buf_pos[3], size); + buf += UCODE_HEADER_SIZE; + if (get_ucode_data(equiv_cpu_table, buf, size)) { + vfree(equiv_cpu_table); + return 0; + } - return size + 12; /* add header length */ + return size + UCODE_HEADER_SIZE; /* add header length */ +#undef UCODE_HEADER_SIZE } -/* fake device for request_firmware */ -extern struct platform_device *microcode_pdev; - -static int cpu_request_microcode_amd(int cpu) +static void free_equiv_cpu_table(void) { - char name[30]; - const struct firmware *firmware; - void *buf; - unsigned int *buf_pos; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - sprintf(name, "amd-ucode/microcode_amd.bin"); - error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin", - µcode_pdev->dev); - if (error) { - printk(KERN_ERR "microcode: ucode data file %s load failed\n", - name); - return error; - } - - buf_pos = (unsigned int *)firmware->data; - buf = (void *)firmware->data; - size = firmware->size; - - if (buf_pos[0] != UCODE_MAGIC) { - printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n"); - return -EINVAL; + if (equiv_cpu_table) { + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; } +} - offset = install_equiv_cpu_table(buf, buf_pos[2], offset); +static int generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + u8 *ucode_ptr = data, *new_mc = NULL, *mc; + int new_rev = uci->cpu_sig.rev; + unsigned int leftover; + unsigned long offset; + offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); if (!offset) { printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); return -EINVAL; } - while ((offset = - get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) { - error = get_matching_microcode_amd(mc, cpu); - if (error < 0) + ucode_ptr += offset; + leftover = size - offset; + + while (leftover) { + unsigned int mc_size; + struct microcode_header_amd *mc_header; + + mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); + if (!mc) break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode_amd(cpu); - error = 0; - } - vfree(mc); + + mc_header = (struct microcode_header_amd *)mc; + if (get_matching_microcode(cpu, mc, new_rev)) { + new_rev = mc_header->patch_id; + new_mc = mc; + } else + vfree(mc); + + ucode_ptr += mc_size; + leftover -= mc_size; } - if (offset > 0) { - vfree(mc); - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; + + if (new_mc) { + if (!leftover) { + if (uci->mc.mc_amd) + vfree(uci->mc.mc_amd); + uci->mc.mc_amd = (struct microcode_amd *)new_mc; + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", + cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev); + } else + vfree(new_mc); } - if (offset < 0) - error = offset; + + free_equiv_cpu_table(); + + return (int)leftover; +} + +static int get_ucode_fw(void *to, const void *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} + +static int request_microcode_fw(int cpu, struct device *device) +{ + const char *fw_name = "amd-ucode/microcode_amd.bin"; + const struct firmware *firmware; + int ret; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + ret = request_firmware(&firmware, fw_name, device); + if (ret) { + printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); + return ret; + } + + ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, + &get_ucode_fw); + release_firmware(firmware); - return error; + return ret; +} + +static int get_ucode_user(void *to, const void *from, size_t n) +{ + return copy_from_user(to, from, n); +} + +static int request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); } static void microcode_fini_cpu_amd(int cpu) @@ -436,10 +419,8 @@ static void microcode_fini_cpu_amd(int cpu) } static struct microcode_ops microcode_amd_ops = { - .get_next_ucode = get_next_ucode_amd, - .get_matching_microcode = get_matching_microcode_amd, - .microcode_sanity_check = NULL, - .cpu_request_microcode = cpu_request_microcode_amd, + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, .microcode_fini_cpu = microcode_fini_cpu_amd, diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index c9b53202ba3d..f4930b55c6a0 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -155,15 +155,15 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static inline int microcode_update_match(int cpu_num, - struct microcode_header_intel *mc_header, int sig, int pf) +static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; +} - if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf) - || mc_header->rev <= uci->cpu_sig.rev) - return 0; - return 1; +static inline int +update_match_revision(struct microcode_header_intel *mc_header, int rev) +{ + return (mc_header->rev <= rev) ? 0 : 1; } static int microcode_sanity_check(void *mc) @@ -248,51 +248,36 @@ static int microcode_sanity_check(void *mc) /* * return 0 - no update found * return 1 - found update - * return < 0 - error */ -static int get_matching_microcode(void *mc, int cpu) +static int +get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; struct extended_signature *ext_sig; - void *new_mc; - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; + if (!update_match_revision(mc_header, rev)) + return 0; + + if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) + return 1; + /* Look for ext. headers: */ if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) return 0; ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; ext_sigcount = ext_header->count; ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; + if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) + return 1; ext_sig++; } return 0; -find: - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, mc_header->rev, uci->cpu_sig.rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc.mc_intel); - - memcpy(new_mc, mc, total_size); - uci->mc.mc_intel = new_mc; - return 1; } static void apply_microcode(int cpu) @@ -300,7 +285,7 @@ static void apply_microcode(int cpu) unsigned long flags; unsigned int val[2]; int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); @@ -338,116 +323,105 @@ static void apply_microcode(int cpu) uci->cpu_sig.rev = val[1]; } -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -extern void __user *user_buffer; /* user area microcode data buffer */ -extern unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) +static int generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) { - struct microcode_header_intel mc_header; - unsigned long total_size; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + u8 *ucode_ptr = data, *new_mc = NULL, *mc; + int new_rev = uci->cpu_sig.rev; + unsigned int leftover = size; - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} -#endif + while (leftover) { + struct microcode_header_intel mc_header; + unsigned int mc_size; -static long get_next_ucode_from_buffer(void **mc, const u8 *buf, - unsigned long size, long offset) -{ - struct microcode_header_intel *mc_header; - unsigned long total_size; + if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) + break; - /* No more data */ - if (offset >= size) - return 0; - mc_header = (struct microcode_header_intel *)(buf + offset); - total_size = get_totalsize(mc_header); + mc_size = get_totalsize(&mc_header); + if (!mc_size || mc_size > leftover) { + printk(KERN_ERR "microcode: error!" + "Bad data in microcode data file\n"); + break; + } - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; + mc = vmalloc(mc_size); + if (!mc) + break; + + if (get_ucode_data(mc, ucode_ptr, mc_size) || + microcode_sanity_check(mc) < 0) { + vfree(mc); + break; + } + + if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + new_rev = mc_header.rev; + new_mc = mc; + } else + vfree(mc); + + ucode_ptr += mc_size; + leftover -= mc_size; } - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; + if (new_mc) { + if (!leftover) { + if (uci->mc.mc_intel) + vfree(uci->mc.mc_intel); + uci->mc.mc_intel = (struct microcode_intel *)new_mc; + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", + cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev); + } else + vfree(new_mc); } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; + + return (int)leftover; } -/* fake device for request_firmware */ -extern struct platform_device *microcode_pdev; +static int get_ucode_fw(void *to, const void *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} -static int cpu_request_microcode(int cpu) +static int request_microcode_fw(int cpu, struct device *device) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; - const u8 *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; + int ret; /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { + ret = request_firmware(&firmware, name, device); + if (ret) { pr_debug("microcode: data file %s load failed\n", name); - return error; - } - buf = firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_matching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); + return ret; } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; + + ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, + &get_ucode_fw); + release_firmware(firmware); - return error; + return ret; +} + +static int get_ucode_user(void *to, const void *from, size_t n) +{ + return copy_from_user(to, from, n); +} + +static int request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) @@ -459,10 +433,8 @@ static void microcode_fini_cpu(int cpu) } static struct microcode_ops microcode_intel_ops = { - .get_next_ucode = get_next_ucode, - .get_matching_microcode = get_matching_microcode, - .microcode_sanity_check = microcode_sanity_check, - .cpu_request_microcode = cpu_request_microcode, + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, .microcode_fini_cpu = microcode_fini_cpu, -- cgit v1.2.3 From aef93c8bd506a78983d91cd576a97fee6e934ac1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:15 -0700 Subject: x86: identify_cpu_without_cpuid v2 Krzysztof found some old cyrix cpu where an mtrr-alike cpu feature was not detected properly. this one is based on Krzysztof' patch, and we call ->c_identify() in early_identify_cpu. need to call c_identify() for cpus without cpuid even earlier ... v2: Krzysztof point out need to give cyrix another chance about cpuid checking again, after ->c_identify() enables cpuid for it Signed-off-by: Yinghai Lu Cc: Krzysztof Helt Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9a57106c1995..f5f25b85be95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -485,6 +485,33 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = cpuid_edx(0x80000007); } + +static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + int i; + + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + + for (i = 0; i < X86_VENDOR_NUM; i++) + if (cpu_devs[i] && cpu_devs[i]->c_identify) { + c->x86_vendor_id[0] = 0; + cpu_devs[i]->c_identify(c); + if (c->x86_vendor_id[0]) { + get_cpu_vendor(c); + break; + } + } +#endif +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -503,13 +530,16 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - if (!have_cpuid_p()) - return; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; + cpu_detect(c); get_cpu_vendor(c); @@ -583,10 +613,14 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + c->extended_cpuid_level = 0; + if (!have_cpuid_p()) - return; + identify_cpu_without_cpuid(c); - c->extended_cpuid_level = 0; + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; cpu_detect(c); @@ -639,17 +673,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - generic_identify(c); if (this_cpu->c_identify) -- cgit v1.2.3 From afae865613c8292e8bdcce4f0b161988d761f033 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:16 -0700 Subject: x86: move transmeta cap read to early_init_transmeta() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 -------- arch/x86/kernel/cpu/transmeta.c | 28 +++++++++++++++------------- 2 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f5f25b85be95..3e2ce4d33d3a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -465,14 +465,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 738e03244f95..52b3fefbd5af 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -5,6 +5,18 @@ #include #include "cpu.h" +static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; @@ -12,6 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; + early_init_transmeta(c); + display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -84,23 +98,11 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_early_init = early_init_transmeta, .c_init = init_transmeta, - .c_identify = transmeta_identify, .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -- cgit v1.2.3 From a9853dd6d285c30a3ddeb3cce8c05e1678400bef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:46:58 +0200 Subject: x86: cpuid, fix typo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e2ce4d33d3a..cef3519b3bbb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -529,7 +529,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); @@ -611,7 +611,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); -- cgit v1.2.3 From a1c75cc5018f17ff6d80ce45a13435b1536f76db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:50:26 +0200 Subject: x86, microcode rework, v2, fix based on patch from Dmitry Adamushko. - add missing vfree() - update debug printks Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 4 +--- arch/x86/kernel/microcode_amd.c | 6 ++++-- arch/x86/kernel/microcode_intel.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 902dada2eb6d..0c2634f4fd7c 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -70,8 +70,6 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ - -/* #define DEBUG pr_debug */ #include #include #include @@ -396,7 +394,7 @@ static int mc_sysdev_resume(struct sys_device *dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ microcode_update_cpu(0); return 0; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 6815837a7753..48aec9f48e4f 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -92,7 +92,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev) unsigned int i = 0; /* - * dimm: do we need this? Why an update via /dev/... is different + * FIXME! dimm: do we need this? Why an update via /dev/... is different * from the one via firmware? * * This is a tricky part. We might be called from a write operation @@ -246,7 +246,7 @@ static void * get_next_ucode(u8 *buf, unsigned int size, return NULL; } - /* Why not by means of get_totalsize(hdr)? */ + /* FIXME! dimm: Why not by means of get_totalsize(hdr)? */ total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); printk(KERN_INFO "microcode: size %u, total_size %u\n", @@ -342,6 +342,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, mc_header = (struct microcode_header_amd *)mc; if (get_matching_microcode(cpu, mc, new_rev)) { + if (new_mc) + vfree(new_mc); new_rev = mc_header->patch_id; new_mc = mc; } else diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index f4930b55c6a0..48ed3cef58c1 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,8 +70,6 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ - -/* #define DEBUG */ /* pr_debug */ #include #include #include @@ -356,6 +354,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, } if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + if (new_mc) + vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; } else -- cgit v1.2.3 From 3d0aedd9538e6be8afec1a9d8b084bf90bc91495 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:01:09 -0700 Subject: x86: signal: put give_sigsegv of setup frames together When setup frame fails, force_sigsegv is called and returns -EFAULT. There is similar code in ia32_setup_frame(), ia32_setup_rt_frame(), __setup_frame() and __setup_rt_frame(). Make them identical. No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 31 ++++++++++++++----------------- arch/x86/kernel/signal_64.c | 17 +++++++++-------- 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b668efc18eae..1c22e0067fe7 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -349,21 +349,21 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; err = __put_user(sig, &frame->sig); if (err) - goto give_sigsegv; + return -EFAULT; err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) - goto give_sigsegv; + return -EFAULT; if (_NSIG_WORDS > 1) { err = __copy_to_user(&frame->extramask, &set->sig[1], sizeof(frame->extramask)); if (err) - goto give_sigsegv; + return -EFAULT; } if (current->mm->context.vdso) @@ -388,7 +388,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; @@ -403,10 +403,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -420,14 +416,14 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, info); if (err) - goto give_sigsegv; + return -EFAULT; /* Create the ucontext. */ if (cpu_has_xsave) @@ -443,7 +439,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -463,7 +459,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; @@ -478,10 +474,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } /* @@ -506,6 +498,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, else ret = __setup_frame(usig, ka, set, regs); + if (ret) { + force_sigsegv(sig, current); + return -EFAULT; + } + return ret; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 43e6862fc7a2..3b79e179ba3a 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -215,12 +215,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) - goto give_sigsegv; + return -EFAULT; } /* Create the ucontext. */ @@ -248,11 +248,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + return -EFAULT; } if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->di = sig; @@ -272,10 +272,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } /* @@ -297,6 +293,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, #endif ret = __setup_rt_frame(sig, ka, info, set, regs); + if (ret) { + force_sigsegv(sig, current); + return -EFAULT; + } + return ret; } -- cgit v1.2.3 From 2ba48e16e78216bb5b9fd08a088bfefda478df25 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:02:53 -0700 Subject: x86: signal: remove unneeded err handling This patch eliminates unused or unneeded variable handling. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 11 ++++------- arch/x86/kernel/signal_64.c | 5 ++--- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 1c22e0067fe7..d433861a6599 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -351,18 +351,15 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err = __put_user(sig, &frame->sig); - if (err) + if (__put_user(sig, &frame->sig)) return -EFAULT; - err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); - if (err) + if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0])) return -EFAULT; if (_NSIG_WORDS > 1) { - err = __copy_to_user(&frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - if (err) + if (__copy_to_user(&frame->extramask, &set->sig[1], + sizeof(frame->extramask))) return -EFAULT; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3b79e179ba3a..a21c85197295 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -210,7 +210,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (save_i387_xstate(fp) < 0) - err |= -1; + return -EFAULT; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; @@ -218,8 +218,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; if (ka->sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, info); - if (err) + if (copy_siginfo_to_user(&frame->info, info)) return -EFAULT; } -- cgit v1.2.3 From e6babb6b7fed93c93f8fc5ef8ebd3a474fc2df3e Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:03:31 -0700 Subject: x86: signal: introduce do_rt_sigreturn() introduce do_rt_sigreturn(), to collect common part of sys_rt_sigreturn(). No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 12 +++++++++--- arch/x86/kernel/signal_64.c | 11 ++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d433861a6599..da3cf3270f83 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -215,9 +215,8 @@ badframe: return 0; } -asmlinkage int sys_rt_sigreturn(unsigned long __unused) +static long do_rt_sigreturn(struct pt_regs *regs) { - struct pt_regs *regs = (struct pt_regs *)&__unused; struct rt_sigframe __user *frame; unsigned long ax; sigset_t set; @@ -243,10 +242,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) return ax; badframe: - signal_fault(regs, frame, "rt sigreturn"); + signal_fault(regs, frame, "rt_sigreturn"); return 0; } +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *)&__unused; + + return do_rt_sigreturn(regs); +} + /* * Set up a signal frame. */ diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index a21c85197295..bf77d4789a2d 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -104,11 +104,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, return err; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +static long do_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; - sigset_t set; unsigned long ax; + sigset_t set; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -131,10 +131,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs, frame, "sigreturn"); + signal_fault(regs, frame, "rt_sigreturn"); return 0; } +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + return do_rt_sigreturn(regs); +} + /* * Set up a signal frame. */ -- cgit v1.2.3 From bee44f294efd8417f5e68553778a6cc957af1547 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:35 +0900 Subject: x86: make GART to respect device's dma_mask about virtual mappings Currently, GART IOMMU ingores device's dma_mask when it does virtual mappings. So it could give a device a virtual address that the device can't access to. This patch fixes the above problem. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1b0c412566e5..9972c42ac925 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,23 +83,34 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask) + unsigned long align_mask, u64 dma_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; + unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; + limit = iommu_device_max_index(iommu_pages, + DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), + dma_mask >> PAGE_SHIFT); + spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, + + if (limit <= next_bit) { + need_flush = 1; + next_bit = 0; + } + + offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1) { + if (offset == -1 && next_bit) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, size, base_index, boundary_size, align_mask); } @@ -228,12 +239,14 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask) + size_t size, int dir, unsigned long align_mask, + u64 dma_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); + unsigned long iommu_page; int i; + iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -263,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0); + bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); flush_gart(); return bus; @@ -314,6 +327,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; + u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -323,7 +337,8 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); + addr = dma_map_area(dev, addr, s->length, dir, 0, + dma_mask); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -345,14 +360,16 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages, 0); - unsigned long iommu_page = iommu_start; + unsigned long iommu_start; + unsigned long iommu_page; struct scatterlist *s; int i; + iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; + iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -497,7 +514,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, - align_mask); + align_mask, dma_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From f10ac8a232496bf9271cfc67c6eea432891f04a6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:47 +0900 Subject: x86: avoid unnecessary low zone allocation in Calgary's alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but Calgary doesn't need it because of virtual mappings. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 8415d92853c4..fe7695e4caae 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, npages = size >> PAGE_SHIFT; order = get_order(size); + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + /* alloc enough pages (and possibly more) */ ret = (void *)__get_free_pages(flag, order); if (!ret) -- cgit v1.2.3 From f6a32a36ab96016675cd414802904feb288d7899 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:48 +0900 Subject: x86: gart alloc_coherent does virtual mapppings only when necessary gart alloc_coherent need to do virtual mapppings only when an allocated buffer is not DMA-capable for a device. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9972c42ac925..9739d5682093 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -505,15 +505,23 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + dma_addr_t paddr; unsigned long align_mask; + u64 dma_mask = dma_alloc_coherent_mask(dev, flag); vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; + paddr = virt_to_phys(vaddr); + if (is_buffer_dma_capable(dma_mask, paddr, size)) { + *dma_addr = paddr; + return vaddr; + } + align_mask = (1UL << get_order(size)) - 1; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, align_mask, dma_mask); flush_gart(); -- cgit v1.2.3 From 5649b7c30316a51792808422ac03ee825d26aa5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 09:29:09 +0200 Subject: x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume Alan Jenkins and Andy Wettstein reported a suspend/resume memory corruption bug and extensively documented it here: http://bugzilla.kernel.org/show_bug.cgi?id=11237 The bug is that the BIOS overwrites 1K of memory at 0xc000 physical, without registering it in e820 as reserved or giving the kernel any idea about this. Detect AMI BIOSen and reserve that 1K. We paint this bug around with a very broad brush (reserving that 1K on all AMI BIOS systems), as the bug was extremely hard to find and needed several weeks and lots of debugging and patching. The bug was found via the CONFIG_X86_CHECK_BIOS_CORRUPTION=y debug feature, if similar bugs are suspected then this feature can be enabled on other systems as well to scan low memory for corrupted memory. Reported-by: Alan Jenkins Reported-by: Andy Wettstein Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ec7e56c1b984..3109ca37a67c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -729,6 +729,29 @@ void start_periodic_check_for_corruption(void) } #endif +static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE + "%s detected: BIOS corrupts 0xc000, working it around.\n", + d->ident); + + reserve_early(0xc000, 0xc400, "BIOS quirk"); + + return 0; +} + +/* List of systems that have known low memory corruption BIOS problems */ +static struct dmi_system_id __initdata bad_bios_dmi_table[] = { + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + }, + {} +}; + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -752,6 +775,8 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + dmi_check_system(bad_bios_dmi_table); + early_cpu_init(); early_ioremap_init(); @@ -1037,3 +1062,5 @@ void __init setup_arch(char **cmdline_p) #endif #endif } + + -- cgit v1.2.3 From 1e22436eba84edfec9c25e5a25d09062c4f91ca9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 09:58:02 +0200 Subject: x86: reserve low 64K on AMI and Phoenix BIOS boxen there's multiple reports about suspend/resume related low memory corruption in this bugzilla: http://bugzilla.kernel.org/show_bug.cgi?id=11237 the common pattern is that the corruption is caused by the BIOS, and that it affects some portion of the first 64K of physical RAM. So add a DMI quirk This will waste 64K RAM on 'good' systems too, but without knowing the exact nature of this BIOS memory corruption this is the safest approach. This might as well solve a wide range of suspend/resume breakages under Linux. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3109ca37a67c..33719544a224 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -732,10 +732,10 @@ void start_periodic_check_for_corruption(void) static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS corrupts 0xc000, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early(0xc000, 0xc400, "BIOS quirk"); + reserve_early(0x0, 0x10000, "BIOS quirk"); return 0; } @@ -749,6 +749,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, {} }; -- cgit v1.2.3 From fc38151947477596aa27df6c4306ad6008dc6711 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 10:07:34 +0200 Subject: x86: add X86_RESERVE_LOW_64K This bugzilla: http://bugzilla.kernel.org/show_bug.cgi?id=11237 Documents a wide range of systems where the BIOS utilizes the first 64K of physical memory during suspend/resume and other hardware events. Currently we reserve this memory on all AMI and Phoenix BIOS systems. Life is too short to hunt subtle memory corruption problems like this, so we try to be robust by default. Still, allow this to be overriden: allow users who want that first 64K of memory to be available to the kernel disable the quirk, via CONFIG_X86_RESERVE_LOW_64K=n. Also, allow the early reservation to overlap with other early reservations. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 33719544a224..786c1886ae53 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -735,13 +735,14 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early(0x0, 0x10000, "BIOS quirk"); + reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); return 0; } /* List of systems that have known low memory corruption BIOS problems */ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { +#ifdef CONFIG_X86_RESERVE_LOW_64K { .callback = dmi_low_memory_corruption, .ident = "AMI BIOS", @@ -757,6 +758,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { }, }, {} +#endif }; /* -- cgit v1.2.3 From ba0593bf553c450a03dbc5f8c1f0ff58b778a0c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 16 Sep 2008 09:29:40 -0700 Subject: x86: completely disable NOPL on 32 bits Completely disable NOPL on 32 bits. It turns out that Microsoft Virtual PC is so broken it can't even reliably *fail* in the presence of NOPL. This leaves the infrastructure in place but disables it unconditionally. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642e..4e456bd955bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -344,31 +344,15 @@ static void __init early_cpu_detect(void) /* * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because + * family >= 6; unfortunately, that's not true in practice because * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. + * are not easy to detect. In the latter case it doesn't even *fail* + * reliably, so probing for it doesn't even work. Disable it completely + * unless we can find a reliable way to detect all the broken cases. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } } static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 90f7d25c6b672137344f447a30a9159945ffea72 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 16 Sep 2008 11:27:30 -0700 Subject: x86: print DMI information in the oops trace in order to diagnose hard system specific issues, it's useful to have the system name in the oops (as provided by DMI) Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..7b9ee9f09639 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -160,6 +161,7 @@ void __show_registers(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; + const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -172,11 +174,15 @@ void __show_registers(struct pt_regs *regs, int all) } printk("\n"); - printk("Pid: %d, comm: %s %s (%s %.*s)\n", + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, -- cgit v1.2.3 From ee2fa7435b6dddf1ca119f298ad0100cf50c0397 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 13:47:25 +0200 Subject: AMD IOMMU: set iommu sunc flag after command queuing The iommu->need_sync flag must be set after the command is queued to avoid race conditions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 69b4d060b21c..a96d8c049a88 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -140,6 +140,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { struct iommu_cmd cmd; + int ret; BUG_ON(iommu == NULL); @@ -147,9 +148,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); cmd.data[0] = devid; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* @@ -159,6 +162,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { struct iommu_cmd cmd; + int ret; memset(&cmd, 0, sizeof(cmd)); address &= PAGE_MASK; @@ -171,9 +175,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* -- cgit v1.2.3 From 7e4f88da7bf1887563f70bd5edbbd0479e31dc12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 14:19:15 +0200 Subject: AMD IOMMU: protect completion wait loop with iommu lock The unlocked polling of the ComWaitInt bit in the IOMMU completion wait path is racy. Protect it with the iommu lock. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a96d8c049a88..042fdc27bc92 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret, ready = 0; + int ret = 0, ready = 0; unsigned status = 0; struct iommu_cmd cmd; - unsigned long i = 0; + unsigned long flags, i = 0; memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; @@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu) iommu->need_sync = 0; - ret = iommu_queue_command(iommu, &cmd); + spin_lock_irqsave(&iommu->lock, flags); + + ret = __iommu_queue_command(iommu, &cmd); if (ret) - return ret; + goto out; while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; @@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); +out: + spin_unlock_irqrestore(&iommu->lock, flags); return 0; } -- cgit v1.2.3 From 279b0bbba2bb647348ad90e183b3960aa99eccfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 18 Sep 2008 23:55:27 -0700 Subject: x86: fix arch/x86/kernel/cpu/mtrr/main.c warning fix this warning reported by Andrew Morton: > arch/x86/kernel/cpu/mtrr/main.c: In function 'mtrr_bp_init': > arch/x86/kernel/cpu/mtrr/main.c:1170: warning: 'extra_remove_base' may be used uninitialized in this function the warning is bogus but the logic that prevents uninitialized use is a bit convoluted so simplify it all. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a564..8a7c79234be6 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1218,11 +1218,10 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(range, 0, sizeof(range)); extra_remove_size = 0; - if (mtrr_tom2) { - extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_base = 1 << (32 - PAGE_SHIFT); + if (mtrr_tom2) extra_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); range_sums = sum_ranges(range, nr_range); -- cgit v1.2.3 From dbcc112e3b5367e81a845b082933506b0ff1d1e2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:04:26 +0200 Subject: AMD IOMMU: check for invalid device pointers Currently AMD IOMMU code triggers a BUG_ON if NULL is passed as the device. This is inconsistent with other IOMMU implementations. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 01c68c38840d..695e0fc41b10 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -645,6 +645,18 @@ static void set_device_domain(struct amd_iommu *iommu, * *****************************************************************************/ +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + if (!dev || !dev->dma_mask) + return false; + + return true; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -661,18 +673,19 @@ static int get_device_resources(struct device *dev, struct pci_dev *pcidev; u16 _bdf; - BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); + *iommu = NULL; + *domain = NULL; + *bdf = 0xffff; + + if (dev->bus != &pci_bus_type) + return 0; pcidev = to_pci_dev(dev); _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) { - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; + if (_bdf > amd_iommu_last_bdf) return 0; - } *bdf = amd_iommu_alias_table[_bdf]; @@ -826,6 +839,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, u16 devid; dma_addr_t addr; + if (!check_device(dev)) + return bad_dma_address; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -860,7 +876,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, struct protection_domain *domain; u16 devid; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -910,6 +927,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, phys_addr_t paddr; int mapped_elems = 0; + if (!check_device(dev)) + return 0; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -967,7 +987,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, u16 devid; int i; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) return; spin_lock_irqsave(&domain->lock, flags); @@ -999,6 +1020,9 @@ static void *alloc_coherent(struct device *dev, size_t size, u16 devid; phys_addr_t paddr; + if (!check_device(dev)) + return NULL; + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1047,6 +1071,9 @@ static void free_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; + if (!check_device(dev)) + return; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) -- cgit v1.2.3 From 270cab2426cdc6307725e4f1f46ecf8ab8e69193 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:49:46 +0200 Subject: AMD IOMMU: move TLB flushing to the map/unmap helper functions This patch moves the invocation of the flushing functions to the map/unmap helpers because its common code in all dma_ops relevant mapping/unmapping code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 695e0fc41b10..691e023695ad 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -795,6 +795,9 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; + if (unlikely(iommu_has_npcache(iommu))) + iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + out: return address; } @@ -825,6 +828,8 @@ static void __unmap_single(struct amd_iommu *iommu, } dma_ops_free_addresses(dma_dom, dma_addr, pages); + + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* @@ -853,9 +858,6 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -885,8 +887,6 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - iommu_flush_pages(iommu, domain->id, dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -948,9 +948,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, mapped_elems++; } else goto unmap; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); } if (iommu->need_sync) @@ -996,8 +993,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, s, nelems, i) { __unmap_single(iommu, domain->priv, s->dma_address, s->dma_length, dir); - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); s->dma_address = s->dma_length = 0; } @@ -1048,9 +1043,6 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, *dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -1082,7 +1074,6 @@ static void free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_flush_pages(iommu, domain->id, dma_addr, size); if (iommu->need_sync) iommu_completion_wait(iommu); -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 13 +++++++++++++ arch/x86/kernel/pci-gart_64.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc62..d2f2c0158dc1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,6 +16,15 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -171,6 +180,10 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9739d5682093..508ef470b27f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,15 +45,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush = 1; - /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -901,10 +892,6 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From 1c65577398589bb44ab0980f9b9d30804b48a5db Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 18:40:05 +0200 Subject: AMD IOMMU: implement lazy IO/TLB flushing The IO/TLB flushing on every unmaping operation is the most expensive part in AMD IOMMU code and not strictly necessary. It is sufficient to do the flush before any entries are reused. This is patch implements lazy IO/TLB flushing which does exactly this. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/amd_iommu_init.c | 7 ++++++- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 691e023695ad..679f2a8e22ee 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -203,6 +203,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, return 0; } +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +{ + u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -386,14 +394,18 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, PAGE_SIZE) >> PAGE_SHIFT; limit = limit < size ? limit : size; - if (dom->next_bit >= limit) + if (dom->next_bit >= limit) { dom->next_bit = 0; + dom->need_flush = true; + } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, 0 , boundary_size, 0); - if (address == -1) + if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 0, boundary_size, 0); + dom->need_flush = true; + } if (likely(address != -1)) { dom->next_bit = address + pages; @@ -553,6 +565,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->bitmap[0] = 1; dma_dom->next_bit = 0; + dma_dom->need_flush = false; + /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { @@ -795,7 +809,10 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(iommu_has_npcache(iommu))) + if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + iommu_flush_tlb(iommu, dma_dom->domain.id); + dma_dom->need_flush = false; + } else if (unlikely(iommu_has_npcache(iommu))) iommu_flush_pages(iommu, dma_dom->domain.id, address, size); out: @@ -829,7 +846,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + if (iommu_fullflush) + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a69cc0f52042..f2fa8dc81beb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -995,6 +995,11 @@ int __init amd_iommu_init(void) else printk("disabled\n"); + if (iommu_fullflush) + printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); + else + printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); + out: return ret; @@ -1057,7 +1062,7 @@ void __init amd_iommu_detect(void) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strcmp(str, "isolate") == 0) + if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; } -- cgit v1.2.3 From 5507eef835c9c941e69d6d96e4b43af23eeb4ac9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:01:02 +0200 Subject: AMD IOMMU: add branch hints to completion wait checks This patch adds branch hints to the cecks if a completion_wait is necessary. The completion_waits in the mapping paths are unlikly because they will only happen on software implementations of AMD IOMMU which don't exists today or with lazy IO/TLB flushing when the allocator wraps around the address space. With lazy IO/TLB flushing the completion_wait in the unmapping path is unlikely too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 679f2a8e22ee..d743aa0adccc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -876,7 +876,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -905,7 +905,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -968,7 +968,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1014,7 +1014,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -1061,7 +1061,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1093,7 +1093,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 6d4f343f84993eb0d5864c0823dc9babd171a33a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:18:02 +0200 Subject: AMD IOMMU: align alloc_coherent addresses properly The API definition for dma_alloc_coherent states that the bus address has to be aligned to the next power of 2 boundary greater than the allocation size. This is violated by AMD IOMMU so far and this patch fixes it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d743aa0adccc..15792ed082e0 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -383,7 +383,8 @@ static unsigned long dma_mask_to_pages(unsigned long mask) */ static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, - unsigned int pages) + unsigned int pages, + unsigned long align_mask) { unsigned long limit = dma_mask_to_pages(*dev->dma_mask); unsigned long address; @@ -400,10 +401,10 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, - 0 , boundary_size, 0); + 0 , boundary_size, align_mask); if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, - 0, boundary_size, 0); + 0, boundary_size, align_mask); dom->need_flush = true; } @@ -787,17 +788,22 @@ static dma_addr_t __map_single(struct device *dev, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, - int dir) + int dir, + bool align) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; unsigned int pages; + unsigned long align_mask = 0; int i; pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; - address = dma_ops_alloc_addresses(dev, dma_dom, pages); + if (align) + align_mask = (1UL << get_order(size)) - 1; + + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -872,7 +878,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); if (addr == bad_dma_address) goto out; @@ -959,7 +965,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir); + paddr, s->length, dir, false); if (s->dma_address) { s->dma_length = s->length; @@ -1053,7 +1059,7 @@ static void *alloc_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL); + size, DMA_BIDIRECTIONAL, true); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From 335503e57b6b8de04cec5d27eb2c3d09ff98905b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2008 14:29:07 +0200 Subject: AMD IOMMU: add event buffer allocation This patch adds the allocation of a event buffer for each AMD IOMMU in the system. The hardware will log events like device page faults or other errors to this buffer once this is enabled. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index f2fa8dc81beb..41ce8d5d626e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -417,6 +417,30 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); + + if (iommu->evt_buf == NULL) + return NULL; + + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu->evt_buf_size = EVT_BUFFER_SIZE; + + return iommu->evt_buf; +} + +static void __init free_event_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); +} + /* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { @@ -622,6 +646,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); + free_event_buffer(iommu); iommu_unmap_mmio_space(iommu); } @@ -661,6 +686,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->cmd_buf) return -ENOMEM; + iommu->evt_buf = alloc_event_buffer(iommu); + if (!iommu->evt_buf) + return -ENOMEM; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); -- cgit v1.2.3 From ee893c24edb8ebab9a3fb66566855572579ad616 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 14:48:04 +0200 Subject: AMD IOMMU: save pci segment from ACPI tables This patch adds the pci_seg field to the amd_iommu structure and fills it with the corresponding value from the ACPI table. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 41ce8d5d626e..b50234ef91ed 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -676,6 +676,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; + iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); if (!iommu->mmio_base) -- cgit v1.2.3 From 3eaf28a1cd2686aaa185b54d5a5e18e91b41f7f2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 15:55:10 +0200 Subject: AMD IOMMU: save pci_dev instead of devid We need the pci_dev later anyways to enable MSI for the IOMMU hardware. So remove the devid pointing to the BDF and replace it with the pci_dev structure where the IOMMU is implemented. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b50234ef91ed..a7eb89d8923d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -242,9 +242,12 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); - print_devid(iommu->devid, 0); - printk(" cap 0x%hx\n", iommu->cap_ptr); + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " + "at %02x:%02x.%x cap 0x%hx\n", + iommu->dev->bus->number, + PCI_SLOT(iommu->dev->devfn), + PCI_FUNC(iommu->dev->devfn), + iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } @@ -511,15 +514,14 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) */ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { - int bus = PCI_BUS(iommu->devid); - int dev = PCI_SLOT(iommu->devid); - int fn = PCI_FUNC(iommu->devid); int cap_ptr = iommu->cap_ptr; u32 range; - iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, + &iommu->cap); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, + &range); - range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), @@ -674,7 +676,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* * Copy data from ACPI table entry to the iommu struct */ - iommu->devid = h->devid; + iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); + if (!iommu->dev) + return 1; + iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; @@ -695,6 +700,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + pci_enable_device(iommu->dev); + return 0; } -- cgit v1.2.3 From a80dc3e0e0dc8393158de317d66ae0f345dc58f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 16:51:41 +0200 Subject: AMD IOMMU: add MSI interrupt support The AMD IOMMU can generate interrupts for various reasons. This patch adds the basic interrupt enabling infrastructure to the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 11 +++++ arch/x86/kernel/amd_iommu_init.c | 99 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 15792ed082e0..0e494b9d5f20 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -49,6 +49,17 @@ static int iommu_has_npcache(struct amd_iommu *iommu) return iommu->cap & IOMMU_CAP_NPCACHE; } +/**************************************************************************** + * + * Interrupt handling functions + * + ****************************************************************************/ + +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_NONE; +} + /**************************************************************************** * * IOMMU command queuing functions diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a7eb89d8923d..14a06464a694 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -515,17 +517,20 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; - u32 range; + u32 range, misc; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, &range); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, + &misc); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); + iommu->evt_msi_num = MMIO_MSI_NUM(misc); } /* @@ -696,6 +701,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->evt_buf) return -ENOMEM; + iommu->int_enabled = false; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); @@ -741,6 +748,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The following functions initialize the MSI interrupts for all IOMMUs + * in the system. Its a bit challenging because there could be multiple + * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per + * pci_dev. + * + ****************************************************************************/ + +static int __init iommu_setup_msix(struct amd_iommu *iommu) +{ + struct amd_iommu *curr; + struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ + int nvec = 0, i; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) { + entries[nvec].entry = curr->evt_msi_num; + entries[nvec].vector = 0; + curr->int_enabled = true; + nvec++; + } + } + + if (pci_enable_msix(iommu->dev, entries, nvec)) { + pci_disable_msix(iommu->dev); + return 1; + } + + for (i = 0; i < nvec; ++i) { + int r = request_irq(entries->vector, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + if (r) + goto out_free; + } + + return 0; + +out_free: + for (i -= 1; i >= 0; --i) + free_irq(entries->vector, NULL); + + pci_disable_msix(iommu->dev); + + return 1; +} + +static int __init iommu_setup_msi(struct amd_iommu *iommu) +{ + int r; + struct amd_iommu *curr; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) + curr->int_enabled = true; + } + + + if (pci_enable_msi(iommu->dev)) + return 1; + + r = request_irq(iommu->dev->irq, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + + if (r) { + pci_disable_msi(iommu->dev); + return 1; + } + + return 0; +} + +static int __init iommu_init_msi(struct amd_iommu *iommu) +{ + if (iommu->int_enabled) + return 0; + + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) + return iommu_setup_msix(iommu); + else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + return iommu_setup_msi(iommu); + + return 1; +} + /**************************************************************************** * * The next functions belong to the third pass of parsing the ACPI @@ -862,6 +958,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); + iommu_init_msi(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From 90008ee4b811c944455752dcb72b291a5ba81b53 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:41:05 +0200 Subject: AMD IOMMU: add event handling code This patch adds code for polling and printing out events generated by the AMD IOMMU. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 87 +++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/amd_iommu_init.c | 1 - 2 files changed, 86 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0e494b9d5f20..0cb8fd2359f5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -55,9 +55,94 @@ static int iommu_has_npcache(struct amd_iommu *iommu) * ****************************************************************************/ +static void iommu_print_event(void *__evt) +{ + u32 *event = __evt; + int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + u64 address = (u64)(((u64)event[3]) << 32) | event[2]; + + printk(KERN_ERR "AMD IOMMU: Event logged ["); + + switch (type) { + case EVENT_TYPE_ILL_DEV: + printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_IO_FAULT: + printk("IO_PAGE_FAULT device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_DEV_TAB_ERR: + printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_PAGE_TAB_ERR: + printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_ILL_CMD: + printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + break; + case EVENT_TYPE_CMD_HARD_ERR: + printk("COMMAND_HARDWARE_ERROR address=0x%016llx " + "flags=0x%04x]\n", address, flags); + break; + case EVENT_TYPE_IOTLB_INV_TO: + printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " + "address=0x%016llx]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address); + break; + case EVENT_TYPE_INV_DEV_REQ: + printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + default: + printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + } +} + +static void iommu_poll_events(struct amd_iommu *iommu) +{ + u32 head, tail; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + while (head != tail) { + iommu_print_event(iommu->evt_buf + head); + head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + } + + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + irqreturn_t amd_iommu_int_handler(int irq, void *data) { - return IRQ_NONE; + struct amd_iommu *iommu; + + list_for_each_entry(iommu, &amd_iommu_list, list) + iommu_poll_events(iommu); + + return IRQ_HANDLED; } /**************************************************************************** diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 14a06464a694..eed488892c01 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -32,7 +32,6 @@ /* * definitions for the ACPI scanning code */ -#define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 #define ACPI_IVHD_TYPE 0x10 -- cgit v1.2.3 From 126c52be4b1d2eb667a1d140f0ceaff9d353f700 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:47:35 +0200 Subject: AMD IOMMU: enable event logging The code to log IOMMU events is in place now. So enable event logging with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index eed488892c01..1974b73fece6 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -253,6 +253,13 @@ void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +/* Function to enable IOMMU event logging and event interrupts */ +void __init iommu_enable_event_logging(struct amd_iommu *iommu) +{ + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); +} + /* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. @@ -958,6 +965,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); + iommu_enable_event_logging(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From a22131a223147016041b5e5cd0ae5ab61ef4177e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 17:55:28 +0200 Subject: AMD IOMMU: allow IO page faults from devices There is a bit in the device entry to suppress all IO page faults generated by a device. This bit was set until now because there was no event logging. Now that there is event logging this patch allows IO page faults from devices to see them in the kernel log. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1974b73fece6..8c1375985557 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -950,7 +950,6 @@ static void init_device_table(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } -- cgit v1.2.3 From b39ba6ad004a31bf2a08ba2b08c1e0f9b3530bb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 18:40:46 +0200 Subject: AMD IOMMU: add dma_supported callback This function determines if the AMD IOMMU implementation is responsible for a given device. So the DMA layer can get this information from the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0cb8fd2359f5..a6a6f8ed1cf5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1204,6 +1204,30 @@ free_mem: free_pages((unsigned long)virt_addr, get_order(size)); } +/* + * This function is called by the DMA layer to find out if we can handle a + * particular device. It is part of the dma_ops. + */ +static int amd_iommu_dma_supported(struct device *dev, u64 mask) +{ + u16 bdf; + struct pci_dev *pcidev; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return 0; + + pcidev = to_pci_dev(dev); + + bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + + /* Out of our scope? */ + if (bdf > amd_iommu_last_bdf) + return 0; + + return 1; +} + /* * The function for pre-allocating protection domains. * @@ -1247,6 +1271,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .unmap_single = unmap_single, .map_sg = map_sg, .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, }; /* -- cgit v1.2.3 From bd60b735c658e6e8c656e89771d281bcfcf51279 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:24:48 +0200 Subject: AMD IOMMU: don't assign preallocated protection domains to devices In isolation mode the protection domains for the devices are preallocated and preassigned. This is bad if a device should be passed to a virtualization guest because the IOMMU code does not know if it is in use by a driver. This patch changes the code to assign the device to the preallocated domain only if there are dma mapping requests for it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a6a6f8ed1cf5..7c1791447451 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -33,6 +33,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); +/* A list of preallocated protection domains */ +static LIST_HEAD(iommu_pd_list); +static DEFINE_SPINLOCK(iommu_pd_list_lock); + /* * general struct to manage commands send to an IOMMU */ @@ -663,6 +667,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->next_bit = 0; dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && @@ -768,6 +773,33 @@ static bool check_device(struct device *dev) return true; } +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid) { + ret = entry; + list_del(&ret->list); + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -803,9 +835,11 @@ static int get_device_resources(struct device *dev, *iommu = amd_iommu_rlookup_table[*bdf]; if (*iommu == NULL) return 0; - dma_dom = (*iommu)->default_dom; *domain = domain_for_device(*bdf); if (*domain == NULL) { + dma_dom = find_protection_domain(*bdf); + if (!dma_dom) + dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; set_device_domain(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " @@ -1257,10 +1291,9 @@ void prealloc_protection_domains(void) if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); - set_device_domain(iommu, &dma_dom->domain, devid); - printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", - dma_dom->domain.id); - print_devid(devid, 1); + dma_dom->target_dev = devid; + + list_add_tail(&dma_dom->list, &iommu_pd_list); } } -- cgit v1.2.3 From 38ddf41b198e21d3ecbe5752e875857b7ce7589e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:38:32 +0200 Subject: AMD IOMMU: some set_device_domain cleanups Remove some magic numbers and split the pte_root using standard functions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7c1791447451..a34d8e915e3a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -739,12 +739,13 @@ static void set_device_domain(struct amd_iommu *iommu, u64 pte_root = virt_to_phys(domain->pt_root); - pte_root |= (domain->mode & 0x07) << 9; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - amd_iommu_dev_table[devid].data[0] = pte_root; - amd_iommu_dev_table[devid].data[1] = pte_root >> 32; + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_pd_table[devid] = domain; -- cgit v1.2.3 From 13d9fead3daa0efa1b8bb6ae59650e4453b39128 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 20:19:40 +0900 Subject: AMD IOMMU: avoid unnecessary low zone allocation in alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but AMD IOMMU doesn't need it for devices that the IOMMU can do virtual mappings for. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a34d8e915e3a..e4866660463e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1173,6 +1173,9 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!check_device(dev)) return NULL; + if (!get_device_resources(dev, &iommu, &domain, &devid)) + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1180,8 +1183,6 @@ static void *alloc_coherent(struct device *dev, size_t size, memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); - get_device_resources(dev, &iommu, &domain, &devid); - if (!iommu || !domain) { *dma_addr = (dma_addr_t)paddr; return virt_addr; -- cgit v1.2.3 From c97ac5359e6897abe22770740294dda185bac30d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:59:15 +0200 Subject: AMD IOMMU: replace memset with __GFP_ZERO in alloc_coherent Remove the memset and use __GFP_ZERO at allocation time instead. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4866660463e..f405a61f61fc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1176,11 +1176,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!get_device_resources(dev, &iommu, &domain, &devid)) flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; - memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); if (!iommu || !domain) { -- cgit v1.2.3 From 6754086ce67c0a1f5d7eac612102368781e14588 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:17:00 +0200 Subject: AMD IOMMU: simplify dma_mask_to_pages The current calculation is very complicated. This patch replaces it with a much simpler version. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f405a61f61fc..db64482b1796 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -472,8 +472,7 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ****************************************************************************/ static unsigned long dma_mask_to_pages(unsigned long mask) { - return (mask >> PAGE_SHIFT) + - (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); + return PAGE_ALIGN(mask) >> PAGE_SHIFT; } /* -- cgit v1.2.3 From d58befd3a0110c93d70756537b4d01d05a9e6e12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:19:58 +0200 Subject: AMD IOMMU: free domain bitmap with its allocation order The amd_iommu_pd_alloc_bitmap is allocated with a calculated order and freed with order 1. This is not a bug since the calculated order always evaluates to 1, but its unclean code. So replace the 1 with the calculation in the release path. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c1375985557..e60f4cd29eb2 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1144,7 +1144,8 @@ out: return ret; free: - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); free_pages((unsigned long)amd_iommu_pd_table, get_order(rlookup_table_size)); -- cgit v1.2.3 From 199d0d501202f077fe647a5c14fe046b17abc46b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 16:45:59 +0200 Subject: AMD IOMMU: remove unnecessary cast to u64 in the init code The ctrl variable is only u32 and readl also returns a 32 bit value. So the cast to u64 is pointless. Remove it with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index e60f4cd29eb2..505fc04e8969 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -235,7 +235,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; - ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); ctrl &= ~(1 << bit); writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -- cgit v1.2.3 From b514e55569855bbaab782a8ec073630ed4e99c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:14:27 +0200 Subject: AMD IOMMU: calculate IVHD size with a function The current calculation of the IVHD entry size is hard to read. So move this code to a seperate function to make it more clear what this calculation does. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 505fc04e8969..80250e63bd07 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -296,6 +296,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) * ****************************************************************************/ +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + return 0x04 << (*ivhd >> 6); +} + /* * This function reads the last device id the IOMMU has to handle from the PCI * capability header for this IOMMU @@ -340,7 +348,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) default: break; } - p += 0x04 << (*p >> 6); + p += ivhd_entry_length(p); } WARN_ON(p != end); @@ -641,7 +649,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - p += 0x04 << (e->type >> 6); + p += ivhd_entry_length(p); } } -- cgit v1.2.3 From 23c1713fe9e6ac886a4d44415298d0cbb2e83df2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:18:17 +0200 Subject: AMD IOMMU: use cmd_buf_size when freeing the command buffer The command buffer release function uses the CMD_BUF_SIZE macro for get_order. Replace this with iommu->cmd_buf_size which is more reliable about the actual size of the buffer. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 80250e63bd07..db0c83af44de 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -433,7 +433,8 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + free_pages((unsigned long)iommu->cmd_buf, + get_order(iommu->cmd_buf_size)); } /* allocates the memory where the IOMMU will log its events to */ -- cgit v1.2.3 From 832a90c30485117d65180cc9a8d9869c1b158570 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:54:23 +0200 Subject: AMD IOMMU: use coherent_dma_mask in alloc_coherent The alloc_coherent implementation for AMD IOMMU currently uses *dev->dma_mask per default. This patch changes it to prefer dev->coherent_dma_mask if it is set. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index db64482b1796..6f7b97445738 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -483,9 +483,10 @@ static unsigned long dma_mask_to_pages(unsigned long mask) static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, - unsigned long align_mask) + unsigned long align_mask, + u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(*dev->dma_mask); + unsigned long limit = dma_mask_to_pages(dma_mask); unsigned long address; unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; @@ -919,7 +920,8 @@ static dma_addr_t __map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, - bool align) + bool align, + u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; @@ -933,7 +935,8 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, + dma_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -997,10 +1000,13 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, struct protection_domain *domain; u16 devid; dma_addr_t addr; + u64 dma_mask; if (!check_device(dev)) return bad_dma_address; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -1008,7 +1014,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + dma_mask); if (addr == bad_dma_address) goto out; @@ -1080,10 +1087,13 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; phys_addr_t paddr; int mapped_elems = 0; + u64 dma_mask; if (!check_device(dev)) return 0; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -1095,7 +1105,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir, false); + paddr, s->length, dir, false, + dma_mask); if (s->dma_address) { s->dma_length = s->length; @@ -1168,6 +1179,7 @@ static void *alloc_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; phys_addr_t paddr; + u64 dma_mask = dev->coherent_dma_mask; if (!check_device(dev)) return NULL; @@ -1187,10 +1199,13 @@ static void *alloc_coherent(struct device *dev, size_t size, return virt_addr; } + if (!dma_mask) + dma_mask = *dev->dma_mask; + spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true); + size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From 5871c6b0a52bb052c3891a787655043b90ae18e3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 20 Sep 2008 20:35:44 -0700 Subject: x86: use round_jiffies() for the corruption check timer the exact timing of the corruption check isn't too important (it's once a minute timer), use round_jiffies() to align it and avoid extra wakeups. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 786c1886ae53..161f1b33ecec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -712,7 +712,7 @@ void check_for_bios_corruption(void) static void periodic_check_for_corruption(unsigned long data) { check_for_bios_corruption(); - mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ); + mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); } void start_periodic_check_for_corruption(void) -- cgit v1.2.3 From 2216d199b1430d1c0affb1498a9ebdbd9c0de439 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 22 Sep 2008 02:52:26 -0700 Subject: x86: fix CONFIG_X86_RESERVE_LOW_64K=y The bad_bios_dmi_table() quirk never triggered because we do DMI setup too late. Move it a bit earlier. Also change the CONFIG_X86_RESERVE_LOW_64K quirk to operate on the e820 table directly instead of messing with early reservations - this handles overlaps (which do occur in this low range of RAM) more gracefully. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 161f1b33ecec..d29951c11be0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -735,7 +735,8 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); + e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return 0; } @@ -784,8 +785,6 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif - dmi_check_system(bad_bios_dmi_table); - early_cpu_init(); early_ioremap_init(); @@ -880,6 +879,10 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + dmi_scan_machine(); + + dmi_check_system(bad_bios_dmi_table); + #ifdef CONFIG_X86_32 probe_roms(); #endif @@ -967,8 +970,6 @@ void __init setup_arch(char **cmdline_p) vsmp_init(); #endif - dmi_scan_machine(); - io_delay_init(); /* -- cgit v1.2.3 From af2d237bf574f89ae5a1b67f2556a324c8f64ff5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:27:13 +0900 Subject: x86: check for ioremap() failure in copy_oldmem_page() Add a check for ioremap() failure in copy_oldmem_page(). This patch also includes small coding style fixes. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a46..280d6ef3af02 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -33,14 +33,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!vaddr) + return -ENOMEM; if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user(buf, vaddr + offset, csize)) { iounmap(vaddr); return -EFAULT; } } else - memcpy(buf, (vaddr + offset), csize); + memcpy(buf, vaddr + offset, csize); iounmap(vaddr); return csize; -- cgit v1.2.3 From 153dab77e228931f3aff74f21b762927ac710ca7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:25:40 +0900 Subject: x86: use platform_device_register_simple() Cleanup pcspeaker.c Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/pcspeaker.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea277..a311ffcaad16 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c @@ -1,20 +1,13 @@ #include -#include +#include #include static __init int add_pcspkr(void) { struct platform_device *pd; - int ret; - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; + pd = platform_device_register_simple("pcspkr", -1, NULL, 0); - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(pd) ? PTR_ERR(pd) : 0; } device_initcall(add_pcspkr); -- cgit v1.2.3 From 16dc552f35bc0ec6fec8ef83f8032eee352d17f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 11:45:04 -0700 Subject: x86: use WARN_ONCE in workaround for mtrr mask so could help catch attention about bug in bios about mtrr mask setting. WARN_ONCE got into mainline already, lets use it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index cb7d3b6a80eb..4e8d77f01eeb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - static int once = 1; - - if (once) { - printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); - once = 0; - } + WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); mask_lo = tmp; } } -- cgit v1.2.3 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-gart_64.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6f7b97445738..70537d117a96 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -572,7 +572,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, if (start_page + pages > last_page) pages = last_page - start_page; - set_bit_string(dom->bitmap, start_page, pages); + iommu_area_reserve(dom->bitmap, start_page, pages); } static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fe7695e4caae..080d1d27f37a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, badbit, tbl, start_addr, npages); } - set_bit_string(tbl->it_map, index, npages); + iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 508ef470b27f..3dcb1ad86e38 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -827,7 +827,7 @@ void __init gart_iommu_init(void) * Out of IOMMU space handling. * Reserve some invalid pages at the beginning of the GART. */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); agp_memory_reserved = iommu_size; printk(KERN_INFO -- cgit v1.2.3 From 28b166a700899a0f88b1cc283c449fb5bf72a635 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:14:13 -0400 Subject: x86, NMI watchdog: when booting with reset_devices, clear the performance counters P4s have a quirk that makes necessary to clear P4_CCCR_OVF bit on the CCCR everytime the PMI is triggered. When booting the kernel with reset_devices (more specific kdump case), the counters reach zero and the PMI will be generated. This is not a problem on other processors but on P4s, it'll continue to generate NMIs until that bit is cleared. Since there may be other users of the performance counters, clear and disable all of them when booting with reset_devices option. We have a P4 box here that crashes because of this problem. Since the kdump kernel usually boots with only one processor active, the second logical unit won't be set up, therefore, MSR_P4_IQ_CCCR1 (and other performance counter registers) won't be cleared and P4_CCCR_OVF may be still set because the previous kernel was using this register. An NMI is triggered because of the MSR_P4_IQ_CCCR1 right after the NMI delivery is enabled, triggering the race fixed on my previous email. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 05cc22dbd4ff..62c010063974 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -432,6 +432,27 @@ static const struct wd_ops p6_wd_ops = { #define P4_CCCR_ENABLE (1 << 12) #define P4_CCCR_OVF (1 << 31) +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; /* * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter * CRU_ESCR0 (with any non-null event selector) through a complemented @@ -473,6 +494,26 @@ static int setup_p4_watchdog(unsigned nmi_hz) evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR0; cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } } else { /* logical cpu 1 */ perfctr_msr = MSR_P4_IQ_PERFCTR1; -- cgit v1.2.3 From b3e15bdef689641e7f1bb03efbe56112c3ee82e2 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:13:59 -0400 Subject: x86, NMI watchdog: setup before enabling NMI watchdog There's a small window when NMI watchdog is being set up that if any NMIs are triggered, the NMI code will make make use of not initalized wd_ops elements: void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: /* enable it before to avoid race with handler */ --> __get_cpu_var(wd_enabled) = 1; --> if (lapic_watchdog_init(nmi_hz) < 0) { (...) asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { (...) if (nmi_watchdog_tick(regs, reason)) return; (...) notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { (...) if (!__get_cpu_var(wd_enabled)) return rc; switch (nmi_watchdog) { case NMI_LOCAL_APIC: rc |= lapic_wd_event(nmi_hz); (...) int lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; --> rdmsrl(wd->perfctr_msr, ctr); and wd->*_msr will be initialized on each processor type specific setup, after enabling NMIs for PMIs. Since the counter was just set, the chances of an performance counter generated NMI is minimal, but any other unknown NMI would trigger the problem. This patch fixes the problem by setting everything up before enabling performance counter generated NMIs and will set wd_enabled using a callback function. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 45 +++++++++++++++++++++++++--------- arch/x86/kernel/nmi.c | 11 +++++++-- 2 files changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 62c010063974..6bff382094f5 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -540,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); wrmsr(cccr_msr, cccr_val, 0); write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); return 1; } @@ -661,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2cc4ad..2c97f07f1c2c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -299,6 +299,15 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 1); } +/* + * This function is called as soon the LAPIC NMI watchdog driver has everything + * in place and it's ready to check if the NMIs belong to the NMI watchdog + */ +void cpu_nmi_set_wd_enabled(void) +{ + __get_cpu_var(wd_enabled) = 1; +} + void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) @@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) switch (nmi_watchdog) { case NMI_LOCAL_APIC: - /* enable it before to avoid race with handler */ - __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; return; -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/amd_iommu_init.c | 5 ++++- arch/x86/kernel/pci-dma.c | 13 ------------- arch/x86/kernel/pci-gart_64.c | 13 +++++++++++++ 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 70537d117a96..c19212191c98 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -948,7 +948,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) @@ -985,7 +985,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db0c83af44de..148fcfe22f17 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ int amd_iommu_isolate; /* if 1, device isolation is enabled */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -1144,7 +1145,7 @@ int __init amd_iommu_init(void) else printk("disabled\n"); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); @@ -1214,6 +1215,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "fullflush", 11) == 0) + amd_iommu_unmap_flush = true; } return 1; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d2f2c0158dc1..0a1408abcc62 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,15 +16,6 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -180,10 +171,6 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 3dcb1ad86e38..9e390f1bd46a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,6 +45,15 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush = 1; + /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -892,6 +901,10 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From a8b71a2810386a5ac8f43d2095fe3355f0d8db37 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 23 Sep 2008 00:35:33 -0700 Subject: x86: fix macro with bad_bios_dmi_table DMI tables need a blank NULL tail. fixes the crash on Ingo's test box. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d29951c11be0..3ce3edfcc3cd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -758,8 +758,8 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), }, }, - {} #endif + {} }; /* -- cgit v1.2.3 From 05e12e1c4c09cd35ac9f4e6af1e42b0036375d72 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 22 Sep 2008 22:58:47 -0700 Subject: x86: fix 27-rc crash on vsmp due to paravirt during module load 27-rc fails to boot up if configured to use modules. Turns out vsmp_patch was marked __init, and vsmp_patch being the pvops 'patch' routine for vsmp, a call to vsmp_patch just turns out to execute a code page with series of 0xcc (POISON_FREE_INITMEM -- int3). vsmp_patch has been marked with __init ever since pvops, however, apply_paravirt can be called during module load causing calls to freed memory location. Since apply_paravirt can only be called during init/module load, make vsmp_patch with "__init_or_module" Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c7..7766d36983fc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } -static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, +static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { switch (type) { -- cgit v1.2.3 From 4faac97d44ac27bdbb010a9c3597401a8f89341f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:54:29 +0200 Subject: x86: prevent stale state of c1e_mask across CPU offline/online Impact: hang which happens across CPU offline/online on AMD C1E systems. When a CPU goes offline then the corresponding bit in the broadcast mask is cleared. For AMD C1E enabled CPUs we do not reenable the broadcast when the CPU comes online again as we do not clear the corresponding bit in the c1e_mask, which keeps track which CPUs have been switched to broadcast already. So on those !$@#& machines we never switch back to broadcasting after a CPU offline/online cycle. Clear the bit when the CPU plays dead. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 11 ++++++++--- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..2e2247117f6e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) return 1; } +static cpumask_t c1e_mask = CPU_MASK_NONE; +static int c1e_detected; + +void c1e_remove_cpu(int cpu) +{ + cpu_clear(cpu, c1e_mask); +} + /* * C1E aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same @@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) */ static void c1e_idle(void) { - static cpumask_t c1e_mask = CPU_MASK_NONE; - static int c1e_detected; - if (need_resched()) return; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0bc..4b3cfdf54216 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -88,6 +88,7 @@ static void cpu_exit_clear(void) cpu_clear(cpu, cpu_callin_map); numa_remove_cpu(cpu); + c1e_remove_cpu(cpu); } /* We don't actually take CPU down, just spin without interrupts. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2a..e12e0e4dd256 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,6 +93,8 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); + c1e_remove_cpu(raw_smp_processor_id()); + mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; -- cgit v1.2.3 From a8d6829044901a67732904be5f1eacdf8539604f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 19:02:25 +0200 Subject: x86: prevent C-states hang on AMD C1E enabled machines Impact: System hang when AMD C1E machines switch into C2/C3 AMD C1E enabled systems do not work with normal ACPI C-states even if the BIOS is advertising them. Limit the C-states to C1 for the ACPI processor idle code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2e2247117f6e..d8c2a299bfe5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -272,6 +272,7 @@ static void c1e_idle(void) c1e_detected = 1; mark_tsc_unstable("TSC halt in C1E"); printk(KERN_INFO "System has C1E enabled\n"); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 09bfeea13cea843fb03eaa96b5d891fa0abdcc90 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 18 Sep 2008 21:12:10 +0200 Subject: x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC Impact: Functional TSC is marked unstable on AMD family 0x10 and 0x11 CPUs. This would be wrong because for those CPUs "invariant TSC" means: "The TSC counts at the same rate in all P-states, all C states, S0, or S1" (See "Processor BIOS and Kernel Developer's Guides" for those CPUs.) [ tglx: Changed C1E to AMD C1E in the printks to avoid confusion with Intel C1E ] Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d8c2a299bfe5..876e91890777 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -270,8 +270,9 @@ static void c1e_idle(void) rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { c1e_detected = 1; - mark_tsc_unstable("TSC halt in C1E"); - printk(KERN_INFO "System has C1E enabled\n"); + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + printk(KERN_INFO "System has AMD C1E enabled\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 18dbc9160507dc7df998e00cd1dcd7889557307b Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Tue, 23 Sep 2008 12:08:44 +0200 Subject: x86: moved microcode.c to microcode_intel.c Combine both generic and arch-specific parts of microcode into a single module (arch-specific parts are config-dependent). Also while we are at it, move arch-specific parts from microcode.h into their respective arch-specific .c files. Signed-off-by: Dmitry Adamushko Cc: "Peter Oruba" Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 8 +- arch/x86/kernel/microcode.c | 505 ------------------------------------- arch/x86/kernel/microcode_amd.c | 72 +++--- arch/x86/kernel/microcode_core.c | 509 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/microcode_intel.c | 79 +++--- 5 files changed, 606 insertions(+), 567 deletions(-) delete mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/microcode_core.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index be454f348c3b..f891996f6849 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,9 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o -obj-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o @@ -101,6 +98,11 @@ scx200-y += scx200_32.o obj-$(CONFIG_OLPC) += olpc.o +microcode-y := microcode_core.o +microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o +microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o +obj-$(CONFIG_MICROCODE) += microcode.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 0c2634f4fd7c..000000000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/253668.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "2.00" - -struct microcode_ops *microcode_ops; - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -EXPORT_SYMBOL_GPL(ucode_cpu_info); - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static int do_microcode_update(const void __user *buf, size_t size) -{ - cpumask_t old; - int error = 0; - int cpu; - - old = current->cpus_allowed; - - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->request_microcode_user(cpu, buf, size); - if (error < 0) - goto out; - if (!error) - microcode_ops->apply_microcode(cpu); - } -out: - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open(struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", - num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - ret = do_microcode_update(buf, len); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init(void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit(void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while (0) -#endif - -/* fake device for request_firmware */ -struct platform_device *microcode_pdev; - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old = current->cpus_allowed; - - get_online_cpus(); - if (cpu_online(cpu)) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - if (uci->valid) { - err = microcode_ops->request_microcode_fw(cpu, - µcode_pdev->dev); - if (!err) - microcode_ops->apply_microcode(cpu); - } - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); - } - put_online_cpus(); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - microcode_ops->microcode_fini_cpu(cpu); - uci->valid = 0; - mutex_unlock(µcode_mutex); -} - -static void collect_cpu_info(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - memset(uci, 0, sizeof(*uci)); - if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) - uci->valid = 1; -} - -static int microcode_resume_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct cpu_signature nsig; - - pr_debug("microcode: CPU%d resumed\n", cpu); - - if (!uci->mc.valid_mc) - return 1; - - /* - * Let's verify that the 'cached' ucode does belong - * to this cpu (a bit of paranoia): - */ - if (microcode_ops->collect_cpu_info(cpu, &nsig)) { - microcode_fini_cpu(cpu); - return -1; - } - - if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { - microcode_fini_cpu(cpu); - /* Should we look for a new ucode here? */ - return 1; - } - - return 0; -} - -void microcode_update_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int err = 0; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - - mutex_lock(µcode_mutex); - /* - * Check if the system resume is in progress (uci->valid != NULL), - * otherwise just request a firmware: - */ - if (uci->valid) { - err = microcode_resume_cpu(cpu); - } else { - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING) - err = microcode_ops->request_microcode_fw(cpu, - µcode_pdev->dev); - } - - if (!err) - microcode_ops->apply_microcode(cpu); - - mutex_unlock(µcode_mutex); -} - -static void microcode_init_cpu(int cpu) -{ - cpumask_t old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - microcode_update_cpu(cpu); - set_cpus_allowed_ptr(current, &old); -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu); - return 0; -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - /* only CPU 0 will apply ucode here */ - microcode_update_cpu(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - microcode_init_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - pr_debug("microcode: CPU%d added\n", cpu); - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - pr_debug("microcode: CPU%d removed\n", cpu); - break; - case CPU_DEAD: - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -int microcode_init(void *opaque, struct module *module) -{ - struct microcode_ops *ops = (struct microcode_ops *)opaque; - int error; - - if (microcode_ops) { - printk(KERN_ERR "microcode: already loaded the other module\n"); - return -EEXIST; - } - - microcode_ops = ops; - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION - " " - " \n"); - - return 0; -} -EXPORT_SYMBOL_GPL(microcode_init); - -void __exit microcode_exit(void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); - - microcode_ops = NULL; - - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -} -EXPORT_SYMBOL_GPL(microcode_exit); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 48aec9f48e4f..03ea4e52e87a 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -46,6 +46,35 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 +struct equiv_cpu_entry { + unsigned int installed_cpu; + unsigned int fixed_errata_mask; + unsigned int fixed_errata_compare; + unsigned int equiv_cpu; +}; + +struct microcode_header_amd { + unsigned int data_code; + unsigned int patch_id; + unsigned char mc_patch_data_id[2]; + unsigned char mc_patch_data_len; + unsigned char init_flag; + unsigned int mc_patch_data_checksum; + unsigned int nb_dev_id; + unsigned int sb_dev_id; + unsigned char processor_rev_id[2]; + unsigned char nb_rev_id; + unsigned char sb_rev_id; + unsigned char bios_api_rev; + unsigned char reserved1[3]; + unsigned int match_reg[8]; +}; + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[0]; +}; + #define UCODE_MAX_SIZE (2048) #define DEFAULT_UCODE_DATASIZE (896) #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) @@ -189,17 +218,18 @@ static void apply_microcode_amd(int cpu) unsigned int rev; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + struct microcode_amd *mc_amd = uci->mc; unsigned long addr; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc.mc_amd == NULL) + if (mc_amd == NULL) return; spin_lock_irqsave(µcode_update_lock, flags); - addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code; + addr = (unsigned long)&mc_amd->hdr.data_code; edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); @@ -214,16 +244,16 @@ static void apply_microcode_amd(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); /* check current patch id and patch's id for match */ - if (rev != uci->mc.mc_amd->hdr.patch_id) { + if (rev != mc_amd->hdr.patch_id) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, - uci->mc.mc_amd->hdr.patch_id, rev); + mc_amd->hdr.patch_id, rev); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x \n", - cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id); + cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); uci->cpu_sig.rev = rev; } @@ -355,12 +385,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size, if (new_mc) { if (!leftover) { - if (uci->mc.mc_amd) - vfree(uci->mc.mc_amd); - uci->mc.mc_amd = (struct microcode_amd *)new_mc; + if (uci->mc) + vfree(uci->mc); + uci->mc = new_mc; pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev); + cpu, new_rev, uci->cpu_sig.rev); } else vfree(new_mc); } @@ -416,8 +446,8 @@ static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - vfree(uci->mc.mc_amd); - uci->mc.mc_amd = NULL; + vfree(uci->mc); + uci->mc = NULL; } static struct microcode_ops microcode_amd_ops = { @@ -428,23 +458,7 @@ static struct microcode_ops microcode_amd_ops = { .microcode_fini_cpu = microcode_fini_cpu_amd, }; -static int __init microcode_amd_module_init(void) +struct microcode_ops * __init init_amd_microcode(void) { - struct cpuinfo_x86 *c = &cpu_data(0); - - equiv_cpu_table = NULL; - if (c->x86_vendor != X86_VENDOR_AMD) { - printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n"); - return -ENODEV; - } - - return microcode_init(µcode_amd_ops, THIS_MODULE); -} - -static void __exit microcode_amd_module_exit(void) -{ - microcode_exit(); + return µcode_amd_ops; } - -module_init(microcode_amd_module_init) -module_exit(microcode_amd_module_exit) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c new file mode 100644 index 000000000000..ff031dbccdf6 --- /dev/null +++ b/arch/x86/kernel/microcode_core.c @@ -0,0 +1,509 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to + * speculative nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "2.00" + +struct microcode_ops *microcode_ops; + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static int do_microcode_update(const void __user *buf, size_t size) +{ + cpumask_t old; + int error = 0; + int cpu; + + old = current->cpus_allowed; + + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); + if (error < 0) + goto out; + if (!error) + microcode_ops->apply_microcode(cpu); + } +out: + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open(struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", + num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + ret = do_microcode_update(buf, len); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init(void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit(void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while (0) +#endif + +/* fake device for request_firmware */ +struct platform_device *microcode_pdev; + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old = current->cpus_allowed; + + get_online_cpus(); + if (cpu_online(cpu)) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + mutex_lock(µcode_mutex); + if (uci->valid) { + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + if (!err) + microcode_ops->apply_microcode(cpu); + } + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); + } + put_online_cpus(); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + microcode_ops->microcode_fini_cpu(cpu); + uci->valid = 0; + mutex_unlock(µcode_mutex); +} + +static void collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + memset(uci, 0, sizeof(*uci)); + if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) + uci->valid = 1; +} + +static int microcode_resume_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpu_signature nsig; + + pr_debug("microcode: CPU%d resumed\n", cpu); + + if (!uci->mc) + return 1; + + /* + * Let's verify that the 'cached' ucode does belong + * to this cpu (a bit of paranoia): + */ + if (microcode_ops->collect_cpu_info(cpu, &nsig)) { + microcode_fini_cpu(cpu); + return -1; + } + + if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { + microcode_fini_cpu(cpu); + /* Should we look for a new ucode here? */ + return 1; + } + + return 0; +} + +void microcode_update_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int err = 0; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); + /* + * Check if the system resume is in progress (uci->valid != NULL), + * otherwise just request a firmware: + */ + if (uci->valid) { + err = microcode_resume_cpu(cpu); + } else { + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING) + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + } + + if (!err) + microcode_ops->apply_microcode(cpu); + + mutex_unlock(µcode_mutex); +} + +static void microcode_init_cpu(int cpu) +{ + cpumask_t old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + microcode_update_cpu(cpu); + set_cpus_allowed_ptr(current, &old); +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu); + return 0; +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + /* only CPU 0 will apply ucode here */ + microcode_update_cpu(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + microcode_init_cpu(cpu); + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + pr_debug("microcode: CPU%d added\n", cpu); + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + pr_debug("microcode: CPU%d removed\n", cpu); + break; + case CPU_DEAD: + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + int error; + + if (c->x86_vendor == X86_VENDOR_INTEL) + microcode_ops = init_intel_microcode(); + else if (c->x86_vendor != X86_VENDOR_AMD) + microcode_ops = init_amd_microcode(); + + if (!microcode_ops) { + printk(KERN_ERR "microcode: no support for this CPU vendor\n"); + return -ENODEV; + } + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION + " " + " \n"); + + return 0; +} + +static void __exit microcode_exit(void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); + + microcode_ops = NULL; + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} + +module_init(microcode_init); +module_exit(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 48ed3cef58c1..622dc4a21784 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -97,6 +97,38 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[0]; +}; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; + #define DEFAULT_UCODE_DATASIZE (2000) #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) @@ -284,11 +316,12 @@ static void apply_microcode(int cpu) unsigned int val[2]; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_intel *mc_intel = uci->mc; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc.mc_intel == NULL) + if (mc_intel == NULL) return; /* serialize access to the physical write to MSR 0x79 */ @@ -296,8 +329,8 @@ static void apply_microcode(int cpu) /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc.mc_intel->bits, - (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16); + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ @@ -307,7 +340,7 @@ static void apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc.mc_intel->hdr.rev) { + if (val[1] != mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); return; @@ -315,9 +348,9 @@ static void apply_microcode(int cpu) printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %04x-%02x-%02x \n", cpu_num, uci->cpu_sig.rev, val[1], - uci->mc.mc_intel->hdr.date & 0xffff, - uci->mc.mc_intel->hdr.date >> 24, - (uci->mc.mc_intel->hdr.date >> 16) & 0xff); + mc_intel->hdr.date & 0xffff, + mc_intel->hdr.date >> 24, + (mc_intel->hdr.date >> 16) & 0xff); uci->cpu_sig.rev = val[1]; } @@ -367,12 +400,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size, if (new_mc) { if (!leftover) { - if (uci->mc.mc_intel) - vfree(uci->mc.mc_intel); - uci->mc.mc_intel = (struct microcode_intel *)new_mc; + if (uci->mc) + vfree(uci->mc); + uci->mc = (struct microcode_intel *)new_mc; pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev); + cpu, new_rev, uci->cpu_sig.rev); } else vfree(new_mc); } @@ -428,11 +461,11 @@ static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - vfree(uci->mc.mc_intel); - uci->mc.mc_intel = NULL; + vfree(uci->mc); + uci->mc = NULL; } -static struct microcode_ops microcode_intel_ops = { +struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, @@ -440,22 +473,8 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; -static int __init microcode_intel_module_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_INTEL) { - printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n"); - return -ENODEV; - } - - return microcode_init(µcode_intel_ops, THIS_MODULE); -} - -static void __exit microcode_intel_module_exit(void) +struct microcode_ops * __init init_intel_microcode(void) { - microcode_exit(); + return µcode_intel_ops; } -module_init(microcode_intel_module_init) -module_exit(microcode_intel_module_exit) -- cgit v1.2.3 From da654b74bda14c45a7d98c731bf3c1a43b6b74e2 Mon Sep 17 00:00:00 2001 From: Srinivasa Ds Date: Tue, 23 Sep 2008 15:23:52 +0530 Subject: signals: demultiplexing SIGTRAP signal Currently a SIGTRAP can denote any one of below reasons. - Breakpoint hit - H/W debug register hit - Single step - Signal sent through kill() or rasie() Architectures like powerpc/parisc provides infrastructure to demultiplex SIGTRAP signal by passing down the information for receiving SIGTRAP through si_code of siginfot_t structure. Here is an attempt is generalise this infrastructure by extending it to x86 and x86_64 archs. Signed-off-by: Srinivasa DS Cc: Roland McGrath Cc: akpm@linux-foundation.org Cc: paulus@samba.org Cc: linuxppc-dev@ozlabs.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 7 ++++--- arch/x86/kernel/traps_32.c | 4 +++- arch/x86/kernel/traps_64.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9e43a48ad6e0..bf45cdf1aaca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1358,7 +1358,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) { struct siginfo info; @@ -1367,7 +1368,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) memset(&info, 0, sizeof(info)); info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; + info.si_code = si_code; /* User-mode ip? */ info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; @@ -1454,5 +1455,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) */ if (test_thread_flag(TIF_SINGLESTEP) && tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) - send_sigtrap(current, regs, 0); + send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index da5a5964fccb..0429c5de5ea9 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -891,6 +891,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned int condition; + int si_code; trace_hardirqs_fixup(); @@ -935,8 +936,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_TF_reenable; } + si_code = get_si_code((unsigned long)condition); /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code); + send_sigtrap(tsk, regs, error_code, si_code); /* * Disable additional traps. They'll be re-enabled when diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 56d6f1147785..011d8e1fac6e 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -941,7 +941,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs, tsk->thread.error_code = error_code; info.si_signo = SIGTRAP; info.si_errno = 0; - info.si_code = TRAP_BRKPT; + info.si_code = get_si_code(condition); info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; force_sig_info(SIGTRAP, &info, tsk); -- cgit v1.2.3 From b6cffde1a20409f9720d5c9aba28d3efd3a4f04e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 17 Sep 2008 15:05:52 +0200 Subject: x86, microcode rework, v2, renaming Renaming based on patch from Dmitry Adamushko. Made code more readable by renaming define and variables related to microcode _container_file_ header to make it distinguishable from microcode _patch_ header. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 03ea4e52e87a..62058e9c8b4e 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -304,12 +304,12 @@ static void * get_next_ucode(u8 *buf, unsigned int size, static int install_equiv_cpu_table(u8 *buf, int (*get_ucode_data)(void *, const void *, size_t)) { -#define UCODE_HEADER_SIZE 12 - u8 *hdr[UCODE_HEADER_SIZE]; - unsigned int *buf_pos = (unsigned int *)hdr; +#define UCODE_CONTAINER_HEADER_SIZE 12 + u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; + unsigned int *buf_pos = (unsigned int *)container_hdr; unsigned long size; - if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE)) + if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) return 0; size = buf_pos[2]; @@ -326,14 +326,14 @@ static int install_equiv_cpu_table(u8 *buf, return 0; } - buf += UCODE_HEADER_SIZE; + buf += UCODE_CONTAINER_HEADER_SIZE; if (get_ucode_data(equiv_cpu_table, buf, size)) { vfree(equiv_cpu_table); return 0; } - return size + UCODE_HEADER_SIZE; /* add header length */ -#undef UCODE_HEADER_SIZE + return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ +#undef UCODE_CONTAINER_HEADER_SIZE } static void free_equiv_cpu_table(void) -- cgit v1.2.3 From d4738792fb86600b6cb7220459d9c47e819b3580 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 17 Sep 2008 15:39:18 +0200 Subject: x86, microcode rework, v2, renaming cont. Renaming based on patch from Dmitry Adamushko. Further clarification by renaming define and variable related to microcode container file. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 62058e9c8b4e..829415208ff8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -263,21 +263,20 @@ static void * get_next_ucode(u8 *buf, unsigned int size, unsigned int *mc_size) { unsigned int total_size; -#define UCODE_UNKNOWN_HDR 8 - u8 hdr[UCODE_UNKNOWN_HDR]; +#define UCODE_CONTAINER_SECTION_HDR 8 + u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; void *mc; - if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR)) + if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) return NULL; - if (hdr[0] != UCODE_UCODE_TYPE) { + if (section_hdr[0] != UCODE_UCODE_TYPE) { printk(KERN_ERR "microcode: error! " "Wrong microcode payload type field\n"); return NULL; } - /* FIXME! dimm: Why not by means of get_totalsize(hdr)? */ - total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); + total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); printk(KERN_INFO "microcode: size %u, total_size %u\n", size, total_size); @@ -290,13 +289,13 @@ static void * get_next_ucode(u8 *buf, unsigned int size, mc = vmalloc(UCODE_MAX_SIZE); if (mc) { memset(mc, 0, UCODE_MAX_SIZE); - if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) { + if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { vfree(mc); mc = NULL; } else - *mc_size = total_size + UCODE_UNKNOWN_HDR; + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; } -#undef UCODE_UNKNOWN_HDR +#undef UCODE_CONTAINER_SECTION_HDR return mc; } -- cgit v1.2.3 From 1eda81495a49a4ee91d8863b0a441a624375efea Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 23 Sep 2008 22:40:02 -0400 Subject: x86: prevent stale state of c1e_mask across CPU offline/online, fix Fix build error introduced by commit 4faac97d44ac27 ("x86: prevent stale state of c1e_mask across CPU offline/online"). process_32.c needs to include idle.h to get the prototype for c1e_remove_cpu() Signed-off-by: Marc Dionne Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b3cfdf54216..31f40b24bf5d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -- cgit v1.2.3 From 77a9a768b7374cd23d1f400097eede9f1547f508 Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Tue, 23 Sep 2008 21:54:00 -0400 Subject: x86: disable apm on the olpc The OLPC doesn't support APM but also doesn't have DMI, so we can't detect and disable it based on DMI data. So, just disable based on machine_is_olpc() Signed-off-by: Jeremy Katz Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b0..732d1f4e10ee 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -234,6 +234,7 @@ #include #include #include +#include #include #include @@ -2217,7 +2218,7 @@ static int __init apm_init(void) dmi_check_system(apm_dmi_table); - if (apm_info.bios.version == 0 || paravirt_enabled()) { + if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { printk(KERN_INFO "apm: BIOS not found.\n"); return -ENODEV; } -- cgit v1.2.3 From 5fd933303bd1efacbd0acbe452ba9b889440eb40 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:19:44 -0700 Subject: x86: signal: cosmetic unification of do_signal() Make do_signal() same. Thia patch modifies only comments in signal_64.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf77d4789a2d..5a5fbc3b1eea 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -410,7 +410,8 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* Re-enable any watchpoints before delivering the + /* + * Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. @@ -418,7 +419,7 @@ static void do_signal(struct pt_regs *regs) if (current->thread.debugreg7) set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ + /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved @@ -441,6 +442,7 @@ static void do_signal(struct pt_regs *regs) regs->ax = regs->orig_ax; regs->ip -= 2; break; + case -ERESTART_RESTARTBLOCK: regs->ax = NR_restart_syscall; regs->ip -= 2; -- cgit v1.2.3 From ee847c54ba7fc09f85f13a5bf18f45ea6c19aa83 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:21:45 -0700 Subject: x86: signal: cosmetic unification of do_notify_resume() Make do_notify_resume() same. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 8 ++++++++ arch/x86/kernel/signal_64.c | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index da3cf3270f83..b94463f264b4 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -663,6 +663,12 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) + /* notify userspace of pending MCEs */ + if (thread_info_flags & _TIF_MCE_NOTIFY) + mce_notify_user(); +#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ + /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); @@ -672,7 +678,9 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } +#ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); +#endif /* CONFIG_X86_32 */ } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 5a5fbc3b1eea..9087752f4109 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -460,14 +460,18 @@ static void do_signal(struct pt_regs *regs) } } -void do_notify_resume(struct pt_regs *regs, void *unused, - __u32 thread_info_flags) +/* + * notification of userspace execution resumption + * - triggered by the TIF_WORK_MASK flags + */ +void +do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#ifdef CONFIG_X86_MCE +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) mce_notify_user(); -#endif /* CONFIG_X86_MCE */ +#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) @@ -477,6 +481,10 @@ void do_notify_resume(struct pt_regs *regs, void *unused, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + +#ifdef CONFIG_X86_32 + clear_thread_flag(TIF_IRET); +#endif /* CONFIG_X86_32 */ } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -- cgit v1.2.3 From 86d3237cd1a09136b4fb3a1d73d3c3fd6331cb14 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:22:32 -0700 Subject: x86: signal: cosmetic unification of handle_signal() Make handle_signal() same. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 9 +++++++++ arch/x86/kernel/signal_64.c | 2 ++ 2 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b94463f264b4..bb05917f232c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -550,6 +550,15 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; +#ifdef CONFIG_X86_64 + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); +#endif + /* * Clear the direction flag as per the ABI for function entry. */ diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 9087752f4109..963236f2c3c1 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -346,12 +346,14 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; +#ifdef CONFIG_X86_64 /* * This has nothing to do with segment registers, * despite the name. This magic affects uaccess.h * macros' behavior. Reset it to the normal setting. */ set_fs(USER_DS); +#endif /* * Clear the direction flag as per the ABI for function entry. -- cgit v1.2.3 From 493cd9122af5bd0b219974a48f0e31da0c29ff7e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 14:56:44 -0700 Subject: x86: ds.c ptrace.c integer as NULL pointer sparse fixes fix: arch/x86/kernel/ptrace.c:763:29: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:777:46: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:1115:45: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:482:26: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:487:25: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Acked-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 4 ++-- arch/x86/kernel/ptrace.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ab21c270bfa4..2b69994fd3a8 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -479,12 +479,12 @@ static int ds_release(struct task_struct *task, enum ds_qualifier qual) goto out; kfree(context->buffer[qual]); - context->buffer[qual] = 0; + context->buffer[qual] = NULL; current->mm->total_vm -= context->pages[qual]; current->mm->locked_vm -= context->pages[qual]; context->pages[qual] = 0; - context->owner[qual] = 0; + context->owner[qual] = NULL; /* * we put the context twice: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index ba19bb49bd09..5df6093ac776 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -734,7 +734,7 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - ds_ovfl_callback_t ovfl = 0; + ds_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; /* we ignore the error in case we were not tracing child */ @@ -748,7 +748,7 @@ static int ptrace_bts_config(struct task_struct *child, ovfl = ptrace_bts_ovfl; } - error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); if (error < 0) goto errout; @@ -1086,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ds_get_bts_index(child, /* pos = */ 0); + ret = ds_get_bts_index(child, /* pos = */ NULL); break; case PTRACE_BTS_GET: -- cgit v1.2.3 From e51a1ac2dfca9ad869471e88f828281db7e810c0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 15:20:09 -0700 Subject: x86, olpc: fix endian bug in openfirmware workaround Boardrev is always treated as a u32 everywhere else, no reason to byteswap the 0xc2 value. The only use is to print out if it is a prerelease board, the test being: (olpc_platform_info.boardrev & 0xf) < 8 Which is currently always true as be32_to_cpu(0xc2) & 0xf = 0 but I doubt that was the intention here. The consequences of the bug are pretty minor though (incorrect boardrev displayed in dmesg when ofw support not configured) Also annotate the temporary used to read the boardrev in the ofw case. The confusion was noticed by Sparse: arch/x86/kernel/olpc.c:206:32: warning: cast to restricted __be32 Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e6672274807..7a13fac63a1f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); static void __init platform_detect(void) { size_t propsize; - u32 rev; + __be32 rev; if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, &propsize) || propsize != 4) { printk(KERN_ERR "ofw: getprop call failed!\n"); - rev = 0; + rev = cpu_to_be32(0); } olpc_platform_info.boardrev = be32_to_cpu(rev); } @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = be32_to_cpu(0xc2); + olpc_platform_info.boardrev = 0xc2; } #endif -- cgit v1.2.3 From 2f9284e4e3be7b0b4d8d0638f9805603069a762d Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Tue, 23 Sep 2008 22:56:35 +0200 Subject: x86, microcode_amd: cleanup, mark request_microcode_user() as unsupported (1) mark mc_size in generic_load_microcode() as unitialized_var to avoid gcc's (false) warning; (2) mark request_microcode_user() as unsupported. The required changes can be added later. Note, we don't break any user-space interfaces here, as there were no kernels with support for AMD-specific ucode update yet. The ucode has to be updated via 'firmware'. Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 829415208ff8..7a1f8eeac2c7 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -120,29 +120,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev) unsigned int equiv_cpu_id = 0x00; unsigned int i = 0; - /* - * FIXME! dimm: do we need this? Why an update via /dev/... is different - * from the one via firmware? - * - * This is a tricky part. We might be called from a write operation - * to the device file instead of the usual process of firmware - * loading. This routine needs to be able to distinguish both - * cases. This is done by checking if there alread is a equivalent - * CPU table installed. If not, we're written through - * /dev/cpu/microcode. - * Since we ignore all checks. The error case in which going through - * firmware loading and that table is not loaded has already been - * checked earlier. - */ BUG_ON(equiv_cpu_table == NULL); -#if 0 - if (equiv_cpu_table == NULL) { - printk(KERN_INFO "microcode: CPU%d microcode update with " - "version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->cpu_sig.rev); - goto out; - } -#endif current_cpu_id = cpuid_eax(0x00000001); while (equiv_cpu_table[i].installed_cpu != 0) { @@ -362,7 +340,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, leftover = size - offset; while (leftover) { - unsigned int mc_size; + unsigned int uninitialized_var(mc_size); struct microcode_header_amd *mc_header; mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); @@ -428,17 +406,11 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } -static int get_ucode_user(void *to, const void *from, size_t n) -{ - return copy_from_user(to, from, n); -} - static int request_microcode_user(int cpu, const void __user *buf, size_t size) { - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" + "is not supported\n"); + return -1; } static void microcode_fini_cpu_amd(int cpu) -- cgit v1.2.3 From 82b078659ed04e1ecdebf8326e189cf76ed361af Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 24 Sep 2008 11:50:35 +0200 Subject: x86: microcode patch loader bugfix Corrected CPU vendor check condition for AMD microcode patch loader initialization. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index ff031dbccdf6..8db2eb55e9e2 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -450,7 +450,7 @@ static int __init microcode_init(void) if (c->x86_vendor == X86_VENDOR_INTEL) microcode_ops = init_intel_microcode(); - else if (c->x86_vendor != X86_VENDOR_AMD) + else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); if (!microcode_ops) { -- cgit v1.2.3 From 8d8c13bdb53977319593d9efc4971a4cacc8bd03 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:10:29 -0700 Subject: x86: signal_32.c: introduce signr_convert() Introduce signr_convert(). This function will help unification of setup_rt_frame(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index bb05917f232c..b1bc90f19b9a 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -482,18 +482,21 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* * OK, we're invoking a handler: */ +static int signr_convert(int sig) +{ + struct thread_info *info = current_thread_info(); + + if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) + return info->exec_domain->signal_invmap[sig]; + return sig; +} + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { + int usig = signr_convert(sig); int ret; - int usig; - - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) -- cgit v1.2.3 From b94fd69827b2104a2a4d9d0bc05a8e908a937ae8 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:12:54 -0700 Subject: x86: signal_64.c: introduce helper function signr_convert() This helper function is for unification of setup_rt_frame(). No effect in binary. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 963236f2c3c1..c5d80024a8f3 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,18 +281,24 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* * OK, we're invoking a handler */ +static int signr_convert(int sig) +{ + return sig; +} + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { + int usig = signr_convert(sig); int ret; #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(sig, ka, info, set, regs); + ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else - ret = ia32_setup_frame(sig, ka, set, regs); + ret = ia32_setup_frame(usig, ka, set, regs); } else #endif ret = __setup_rt_frame(sig, ka, info, set, regs); -- cgit v1.2.3 From 455edbc423db282bb64dec2d7c8968498ea8e619 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:13:11 -0700 Subject: x86: signal: introduce helper macro is_ia32 Introduce new macro is_ia32 for unification of setup_rt_frame(). No effect in binary, compiler will optimize. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 13 +++++++++---- arch/x86/kernel/signal_64.c | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b1bc90f19b9a..cf62f70cc2a6 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -491,6 +491,8 @@ static int signr_convert(int sig) return sig; } +#define is_ia32 1 + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) @@ -499,10 +501,13 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int ret; /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(usig, ka, info, set, regs); - else - ret = __setup_frame(usig, ka, set, regs); + if (is_ia32) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = __setup_rt_frame(usig, ka, info, set, regs); + else + ret = __setup_frame(usig, ka, set, regs); + } else + ret = __setup_rt_frame(sig, ka, info, set, regs); if (ret) { force_sigsegv(sig, current); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index c5d80024a8f3..53f86d95985b 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -286,6 +286,12 @@ static int signr_convert(int sig) return sig; } +#ifdef CONFIG_IA32_EMULATION +#define is_ia32 test_thread_flag(TIF_IA32) +#else +#define is_ia32 0 +#endif + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) @@ -293,15 +299,14 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int usig = signr_convert(sig); int ret; -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) { + /* Set up the stack frame */ + if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else ret = ia32_setup_frame(usig, ka, set, regs); } else -#endif - ret = __setup_rt_frame(sig, ka, info, set, regs); + ret = __setup_rt_frame(sig, ka, info, set, regs); if (ret) { force_sigsegv(sig, current); -- cgit v1.2.3 From 4694d2391221e14fe671602fe34ea7f24f5a561f Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:13:29 -0700 Subject: x86: signal_32.c: introduce macro ia32_setup_frame and ia32_setup_rt_frame Make 32-bit setup_rt_frame() look like 64-bit version for unification. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index cf62f70cc2a6..4337cd510f0a 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -492,6 +492,8 @@ static int signr_convert(int sig) } #define is_ia32 1 +#define ia32_setup_frame __setup_frame +#define ia32_setup_rt_frame __setup_rt_frame static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -503,9 +505,9 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up the stack frame */ if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(usig, ka, info, set, regs); + ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else - ret = __setup_frame(usig, ka, set, regs); + ret = ia32_setup_frame(usig, ka, set, regs); } else ret = __setup_rt_frame(sig, ka, info, set, regs); -- cgit v1.2.3 From 9f6ac57729724b58df81ca5dc005326759a806fe Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:35 +0900 Subject: x86: export pci-nommu's alloc_coherent This patch exports nommu_alloc_coherent (renamed dma_generic_alloc_coherent). GART needs this function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu.c | 39 +-------------------------------------- 2 files changed, 32 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc62..4e612d20170a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -134,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) EXPORT_SYMBOL(iommu_num_pages); #endif +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long dma_mask; + struct page *page; + dma_addr_t addr; + + dma_mask = dma_alloc_coherent_mask(dev, flag); + + flag |= __GFP_ZERO; +again: + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!page) + return NULL; + + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size)) { + __free_pages(page, get_order(size)); + + if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + return NULL; + } + + *dma_addr = addr; + return page_address(page); +} + /* * See for the iommu kernel parameter * documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 1c1c98a31d57..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,43 +72,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void * -nommu_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) -{ - unsigned long dma_mask; - int node; - struct page *page; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - - gfp |= __GFP_ZERO; - - node = dev_to_node(hwdev); -again: - page = alloc_pages_node(node, gfp, get_order(size)); - if (!page) - return NULL; - - addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { - free_pages((unsigned long)page_address(page), get_order(size)); - gfp |= GFP_DMA; - goto again; - } - - if (check_addr("alloc_coherent", hwdev, addr, size)) { - *dma_addr = addr; - flush_write_buffers(); - return page_address(page); - } - - free_pages((unsigned long)page_address(page), get_order(size)); - - return NULL; -} - static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr) { @@ -116,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, } struct dma_mapping_ops nommu_dma_ops = { - .alloc_coherent = nommu_alloc_coherent, + .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, -- cgit v1.2.3 From ecef533ea68b2fb3baaf459beb2f802a240bdb16 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:36 +0900 Subject: revert "x86: make GART to respect device's dma_mask about virtual mappings" This reverts: commit bee44f294efd8417f5e68553778a6cc957af1547 Author: FUJITA Tomonori Date: Fri Sep 12 19:42:35 2008 +0900 x86: make GART to respect device's dma_mask about virtual mappings I wrote the above commit to fix a GART alloc_coherent regression, that can't handle a device having dma_masks > 24bit < 32bits, introduced by the alloc_coherent rewrite: http://lkml.org/lkml/2008/8/12/200 After the alloc_coherent rewrite, GART alloc_coherent tried to allocate pages with GFP_DMA32. If GART got an address that a device can't access to, GART mapped the address to a virtual I/O address. But GART mapping mechanism didn't take account of dma mask, so GART could use a virtual I/O address that the device can't access to again. Alan pointed out: " This is indeed a specific problem found with things like older AACRAID where control blocks must be below 31bits and the GART is above 0x80000000. " The above commit modified GART mapping mechanism to take care of dma mask. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That means it's possible that GART doesn't have virtual I/O address space that a device can access to. The above commit (to modify GART mapping mechanism to take care of dma mask) can't fix the regression reliably so let's avoid making GART more complicated. We need a solution that always works for dma_masks > 24bit < 32bits. That's how GART worked before the alloc_coherent rewrite. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Acked-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9e390f1bd46a..7e08e466b8ad 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,34 +83,23 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask, u64 dma_mask) + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; - unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(iommu_pages, - DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), - dma_mask >> PAGE_SHIFT); - spin_lock_irqsave(&iommu_bitmap_lock, flags); - - if (limit <= next_bit) { - need_flush = 1; - next_bit = 0; - } - - offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1 && next_bit) { + if (offset == -1) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); } @@ -239,14 +228,12 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask, - u64 dma_mask) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page; + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; - iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -276,7 +263,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); + bus = dma_map_area(dev, paddr, size, dir, 0); flush_gart(); return bus; @@ -327,7 +314,6 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; - u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -337,8 +323,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0, - dma_mask); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -360,16 +345,14 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start; - unsigned long iommu_page; + unsigned long iommu_start = alloc_iommu(dev, pages, 0); + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; - iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; - iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -522,7 +505,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask, dma_mask); + align_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From 1d990882153f36723f9e8717c4401689e64c7a36 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:37 +0900 Subject: x86: restore old GART alloc_coherent behavior Currently, GART alloc_coherent tries to allocate pages with GFP_DMA32 for a device having dma_masks > 24bit < 32bits. If GART gets an address that a device can't access to, GART try to map the address to a virtual I/O address that the device can access to. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That is, it's possible that GART doesn't have virtual I/O address space that a device can access to. The above behavior doesn't work for a device having dma_masks > 24bit < 32bits. This patch restores old GART alloc_coherent behavior (before the alloc_coherent rewrite). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 7e08e466b8ad..25c94fb96d74 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -487,31 +487,28 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { - void *vaddr; dma_addr_t paddr; unsigned long align_mask; - u64 dma_mask = dma_alloc_coherent_mask(dev, flag); - - vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (!vaddr) - return NULL; - - paddr = virt_to_phys(vaddr); - if (is_buffer_dma_capable(dma_mask, paddr, size)) { - *dma_addr = paddr; - return vaddr; - } - - align_mask = (1UL << get_order(size)) - 1; - - *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask); - flush_gart(); - - if (*dma_addr != bad_dma_address) - return vaddr; - - free_pages((unsigned long)vaddr, get_order(size)); + struct page *page; + + if (force_iommu && !(flag & GFP_DMA)) { + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); + if (!page) + return NULL; + + align_mask = (1UL << get_order(size)) - 1; + paddr = dma_map_area(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, align_mask); + + flush_gart(); + if (paddr != bad_dma_address) { + *dma_addr = paddr; + return page_address(page); + } + __free_pages(page, get_order(size)); + } else + return dma_generic_alloc_coherent(dev, size, dma_addr, flag); return NULL; } -- cgit v1.2.3 From 1615965e54eb94d7bcd298d2163739bd79f602d4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 22:41:10 +0900 Subject: x86 gart: remove unnecessary initialization There is no point to have such initialization in struct dma_mapping_ops. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 25c94fb96d74..b85a2f9bb340 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -715,12 +715,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, -- cgit v1.2.3 From f6476774f1fe32593d3d71903b1e98514efbf685 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 24 Sep 2008 14:35:17 -0400 Subject: x86_64: be less annoying on boot Remove mostly useless message on every boot. Signed-off-by: Bill Nottingham Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9bfc4d72fb2e..11aa501c9f4f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -112,8 +112,6 @@ void __init x86_64_start_kernel(char * real_mode_data) x86_64_init_pda(); - early_printk("Kernel really alive\n"); - x86_64_start_reservations(real_mode_data); } -- cgit v1.2.3 From d7161a65341556bacb5e6654e133803f46f51063 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:41 -0500 Subject: kgdb, x86, arm, mips, powerpc: ignore user space single stepping On the x86 arch, user space single step exceptions should be ignored if they occur in the kernel space, such as ptrace stepping through a system call. First check if it is kgdb that is executing a single step, then ensure it is not an accidental traversal into the user space, while in kgdb, any other time the TIF_SINGLESTEP is set, kgdb should ignore the exception. On x86, arm, mips and powerpc, the kgdb_contthread usage was inconsistent with the way single stepping is implemented in the kgdb core. The arch specific stub should always set the kgdb_cpu_doing_single_step correctly if it is single stepping. This allows kgdb to correctly process an instruction steps if ptrace happens to be requesting an instruction step over a system call. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b8..00f7896c9a19 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -378,10 +378,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; kgdb_single_step = 1; - if (kgdb_contthread) { - atomic_set(&kgdb_cpu_doing_single_step, - raw_smp_processor_id()); - } + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); } get_debugreg(dr6, 6); @@ -466,9 +464,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id() && - user_mode(regs)) - return single_step_cont(regs, args); + raw_smp_processor_id()) { + if (user_mode(regs)) + return single_step_cont(regs, args); + break; + } else if (test_thread_flag(TIF_SINGLESTEP)) + /* This means a user thread is single stepping + * a system call which should be ignored + */ + return NOTIFY_DONE; /* fall through */ default: if (user_mode(regs)) -- cgit v1.2.3 From 703a1edcd1534468fc18f733c03bd91a65c8c6f0 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:42 -0500 Subject: kgdb, x86_64: fix PS CS SS registers in gdb serial On x86_64 the gdb serial register structure defines the PS (also known as eflags), CS and SS registers as 4 bytes entities. This patch splits the x86_64 regnames enum into a 32 and 64 version to account for the 32 bit entities in the gdb serial packets. Also the program counter is properly filled in for the sleeping threads. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00f7896c9a19..8282a2139681 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -69,6 +69,9 @@ static int gdb_x86vector = -1; */ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = regs->ax; gdb_regs[GDB_BX] = regs->bx; gdb_regs[GDB_CX] = regs->cx; @@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SI] = regs->si; gdb_regs[GDB_DI] = regs->di; gdb_regs[GDB_BP] = regs->bp; - gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_PC] = regs->ip; #ifdef CONFIG_X86_32 + gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; @@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_R13] = regs->r13; gdb_regs[GDB_R14] = regs->r14; gdb_regs[GDB_R15] = regs->r15; + gdb_regs32[GDB_PS] = regs->flags; + gdb_regs32[GDB_CS] = regs->cs; + gdb_regs32[GDB_SS] = regs->ss; #endif gdb_regs[GDB_SP] = regs->sp; } @@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) */ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = 0; gdb_regs[GDB_BX] = 0; gdb_regs[GDB_CX] = 0; @@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; #else - gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); - gdb_regs[GDB_PC] = 0; + gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_CS] = __KERNEL_CS; + gdb_regs32[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_PC] = p->thread.ip; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; @@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) */ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif regs->ax = gdb_regs[GDB_AX]; regs->bx = gdb_regs[GDB_BX]; regs->cx = gdb_regs[GDB_CX]; @@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->si = gdb_regs[GDB_SI]; regs->di = gdb_regs[GDB_DI]; regs->bp = gdb_regs[GDB_BP]; - regs->flags = gdb_regs[GDB_PS]; regs->ip = gdb_regs[GDB_PC]; #ifdef CONFIG_X86_32 + regs->flags = gdb_regs[GDB_PS]; regs->ds = gdb_regs[GDB_DS]; regs->es = gdb_regs[GDB_ES]; regs->cs = gdb_regs[GDB_CS]; @@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->r13 = gdb_regs[GDB_R13]; regs->r14 = gdb_regs[GDB_R14]; regs->r15 = gdb_regs[GDB_R15]; + regs->flags = gdb_regs32[GDB_PS]; + regs->cs = gdb_regs32[GDB_CS]; + regs->ss = gdb_regs32[GDB_SS]; #endif } -- cgit v1.2.3 From 7fc2368d1d0dce7a778beb2fba3acac8fa7a34b6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:07 -0700 Subject: x86: don't need to go to chunksize to 4G change back chunksize max to 2g otherwise will get strange layout in 2G ram system like 0 - 4g WB, 2040M - 2048M UC, 2048M - 4G NC instead of 0 - 2g WB, 2040M - 2048M UC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a564..cc135572882a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1155,10 +1155,10 @@ struct mtrr_cleanup_result { /* * gran_size: 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 4G - * so we need (2+13)*6 + * chunk size: gran_size, ..., 2G + * so we need (1+12)*6 */ -#define NUM_RESULT 90 +#define NUM_RESULT 78 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1276,7 +1276,7 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { - for (chunk_size = gran_size; chunk_size < (1ULL<<33); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; -- cgit v1.2.3 From 2313c2793d290a8cc37c428f8622c53f3fe1d6dc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:08 -0700 Subject: x86: mtrr_cleanup optimization, v2 fix hpa's t61 with 4g ram: change layout from (n - 1)*chunksize + chunk_size - NC to n*chunksize - NC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 79 +++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index cc135572882a..93d575ac61a3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -970,6 +970,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + + /* no increase */ if (range0_sizek == state->range_sizek) { if (debug_print) printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", @@ -980,13 +982,13 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, return 0; } - range0_sizek -= chunk_sizek; - if (range0_sizek && sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; - if (!range0_sizek) - break; - } + /* only cut back, when it is not the last */ + if (sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } } if (range0_sizek) { @@ -1000,46 +1002,39 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, } range_basek = range0_basek + range0_sizek; - range_sizek = chunk_sizek; - if (range_basek + range_sizek > basek && - range_basek + range_sizek <= (basek + sizek)) { - /* one hole */ - second_basek = basek; - second_sizek = range_basek + range_sizek - basek; - } + /* one hole in the middle */ + if (range_basek > basek && range_basek <= (basek + sizek)) + second_sizek = range_basek - basek; - /* if last piece, only could one hole near end */ - if ((second_basek || !basek) && - range_sizek - (state->range_sizek - range0_sizek) - second_sizek < - (chunk_sizek >> 1)) { - /* - * one hole in middle (second_sizek is 0) or at end - * (second_sizek is 0 ) - */ - hole_sizek = range_sizek - (state->range_sizek - range0_sizek) - - second_sizek; - hole_basek = range_basek + range_sizek - hole_sizek - - second_sizek; - } else { - /* fallback for big hole, or several holes */ + if (range0_sizek > state->range_sizek) { + unsigned long hole_basek, hole_sizek; + + /* one hole in middle or at end */ + hole_sizek = range0_sizek - state->range_sizek - second_sizek; + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, + MTRR_TYPE_UNCACHABLE); + } + } else { + /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - second_basek = 0; - second_sizek = 0; - } - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + if (range_sizek) { + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); - if (hole_sizek) { - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, - MTRR_TYPE_UNCACHABLE); - + } } return second_sizek; -- cgit v1.2.3 From 54d45ff4208836e62536fc40b0141586dbf6641f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:06 -0700 Subject: x86: add mtrr_cleanup_debug command line add mtrr_cleanup_debug to print out more info about layout Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 93d575ac61a3..aff46b99ff01 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -836,6 +836,13 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +static int __init mtrr_cleanup_debug_setup(char *str) +{ + debug_print = 1; + return 0; +} +early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; @@ -1227,7 +1234,7 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; - debug_print = 1; + debug_print++; /* convert ranges to var ranges state */ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, mtrr_gran_size); @@ -1263,7 +1270,7 @@ static int __init mtrr_cleanup(unsigned address_bits) } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " "will find optimal one\n"); - debug_print = 0; + debug_print--; memset(result, 0, sizeof(result[0])); } @@ -1366,8 +1373,9 @@ static int __init mtrr_cleanup(unsigned address_bits) chunk_size <<= 10; gran_size = result[i].gran_sizek; gran_size <<= 10; - debug_print = 1; + debug_print++; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + debug_print--; set_var_mtrr_all(address_bits); return 1; } -- cgit v1.2.3 From 237a62247c2879331986a300d6ab36ad21264c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:53 +0200 Subject: x86/iommu: make GART driver checkpatch clean Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b85a2f9bb340..aa569db73a2a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -27,8 +27,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -175,7 +175,8 @@ static void dump_leak(void) iommu_leak_pages); for (i = 0; i < iommu_leak_pages; i += 2) { printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); + printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], + 0); printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); } printk(KERN_DEBUG "\n"); @@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) if (!error) error = sysdev_register(&device_gart); if (error) - panic("Could not register gart_sysdev -- would corrupt data on next suspend"); + panic("Could not register gart_sysdev -- " + "would corrupt data on next suspend"); flush_gart(); @@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -extern int agp_amd64_init(void); - static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, @@ -777,8 +777,8 @@ void __init gart_iommu_init(void) (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "but GART IOMMU not available.\n"); + printk(KERN_WARNING "falling back to iommu=soft.\n"); } return; } @@ -868,7 +868,8 @@ void __init gart_parse_options(char *p) if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; - if (*p == '=') ++p; + if (*p == '=') + ++p; if (isdigit(*p) && get_option(&p, &arg)) iommu_leak_pages = arg; } -- cgit v1.2.3 From 3610f2116e961cdcbd3546a3828470f7aa636212 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:54 +0200 Subject: x86/iommu: convert GART need_flush to bool Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aa569db73a2a..aecea068f583 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static bool need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, unsigned long align_mask) @@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); if (offset == -1) { - need_flush = 1; + need_flush = true; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); @@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; - need_flush = 1; + need_flush = true; } } if (iommu_fullflush) - need_flush = 1; + need_flush = true; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset; @@ -136,7 +136,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); - need_flush = 0; + need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From 0114267be1bebc2e9c913b19579900153583d617 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:42:12 +0200 Subject: x86/iommu: use __GFP_ZERO instead of memset for GART Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aecea068f583..d077116fec1b 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) info->aper_size = aper_size >> 20; gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(gatt_size)); if (!gatt) panic("Cannot allocate GATT table"); if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) panic("Could not set GART PTEs to uncacheable pages"); - memset(gatt, 0, gatt_size); agp_gatt_table = gatt; enable_gart_translations(); @@ -788,19 +788,16 @@ void __init gart_iommu_init(void) iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; - iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(iommu_pages/8)); if (!iommu_gart_bitmap) panic("Cannot allocate iommu bitmap\n"); - memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); - else + if (!iommu_leak_tab) printk(KERN_DEBUG "PCI-DMA: Cannot allocate leak trace area\n"); } -- cgit v1.2.3 From 8f0afaa58e912bbe7d5b0bad9fb024337edf363e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 20:26:06 -0700 Subject: x86: mtrr_cleanup hole size should be less than half of chunk_size, v2 v2: should check with half of range0 size instead of chunk_size So don't have silly big hole. in hpa's case we could auto detect instead of adding mtrr_chunk_size in command line. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 74 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index aff46b99ff01..bccf57f5b615 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -992,22 +992,17 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* only cut back, when it is not the last */ if (sizek) { while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; + if (range0_sizek >= chunk_sizek) + range0_sizek -= chunk_sizek; + else + range0_sizek = 0; + if (!range0_sizek) break; } } - if (range0_sizek) { - if (debug_print) - printk(KERN_DEBUG "range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK); - - } - +second_try: range_basek = range0_basek + range0_sizek; /* one hole in the middle */ @@ -1015,33 +1010,50 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, second_sizek = range_basek - basek; if (range0_sizek > state->range_sizek) { - unsigned long hole_basek, hole_sizek; /* one hole in middle or at end */ hole_sizek = range0_sizek - state->range_sizek - second_sizek; - if (hole_sizek) { - hole_basek = range_basek - hole_sizek - second_sizek; - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, - MTRR_TYPE_UNCACHABLE); + + /* hole size should be less than half of range0 size */ + if (hole_sizek > (range0_sizek >> 1) && + range0_sizek >= chunk_sizek) { + range0_sizek -= chunk_sizek; + second_sizek = 0; + hole_sizek = 0; + + goto second_try; } - } else { + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + } + + if (range0_sizek < state->range_sizek) { /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - if (range_sizek) { - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, - MTRR_TYPE_WRBACK); - } + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); + } + + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE); } return second_sizek; -- cgit v1.2.3 From 12544697f12e0ecdcf971075415c7678fae502af Mon Sep 17 00:00:00 2001 From: dcg Date: Sun, 28 Sep 2008 18:49:46 +0200 Subject: x86_64: be less annoying on boot, v2 Honour "quiet" boot parameter in early_printk() calls Signed-off-by: Diego Calleja Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 11aa501c9f4f..d16084f90649 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -108,7 +108,8 @@ void __init x86_64_start_kernel(char * real_mode_data) } load_idt((const struct desc_ptr *)&idt_descr); - early_printk("Kernel alive\n"); + if (console_loglevel == 10) + early_printk("Kernel alive\n"); x86_64_init_pda(); -- cgit v1.2.3 From 73436a1d2501575f9c2e6ddb26889145e23cefd8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 13:39:17 -0700 Subject: x86: mtrr_cleanup safe to get more spare regs now Delay exit to make sure we can actually get the optimal result in as many cases as possible. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index bccf57f5b615..ae0ca97e20bd 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1353,10 +1353,8 @@ static int __init mtrr_cleanup(unsigned address_bits) nr_mtrr_spare_reg = num_var_ranges - 1; num_reg_good = -1; for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) { + if (!min_loss_pfn[i]) num_reg_good = i; - break; - } } index_good = -1; -- cgit v1.2.3 From dd7e52224fd7438d701b4bb9834a9ddc06828210 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:11 -0700 Subject: x86: mtrr_cleanup prepare to make gran_size to less 1M make the print out right with size < 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 103 ++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ae0ca97e20bd..b1bd1038c913 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -905,6 +905,27 @@ set_var_mtrr_all(unsigned int address_bits) } } +static unsigned long to_size_factor(unsigned long sizek, char *factorp) +{ + char factor; + unsigned long base = sizek; + + if (base & ((1<<10) - 1)) { + /* not MB alignment */ + factor = 'K'; + } else if (base & ((1<<20) - 1)){ + factor = 'M'; + base >>= 10; + } else { + factor = 'G'; + base >>= 20; + } + + *factorp = factor; + + return base; +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, unsigned char type) @@ -926,13 +947,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - if (debug_print) + if (debug_print) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + start_base = to_size_factor(range_startk, &start_factor), + size_base = to_size_factor(sizek, &size_factor), + printk(KERN_DEBUG "Setting variable MTRR %d, " - "base: %ldMB, range: %ldMB, type %s\n", - reg, range_startk >> 10, sizek >> 10, + "base: %ld%cB, range: %ld%cB, type %s\n", + reg, start_base, start_factor, + size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); + } save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; @@ -1245,6 +1274,8 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; debug_print++; /* convert ranges to var ranges state */ @@ -1270,12 +1301,15 @@ static int __init mtrr_cleanup(unsigned address_bits) result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + lose_base, lose_factor); if (!result[i].bad) { set_var_mtrr_all(address_bits); return 1; @@ -1290,14 +1324,25 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + char gran_factor; + unsigned long gran_base; + + if (debug_print) + gran_base = to_size_factor(gran_size >> 10, &gran_factor); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; - if (debug_print) - printk(KERN_INFO - "\ngran_size: %lldM chunk_size_size: %lldM\n", - gran_size >> 20, chunk_size >> 20); + if (debug_print) { + char chunk_factor; + unsigned long chunk_base; + + chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), + printk(KERN_INFO "\n"); + printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", + gran_base, gran_factor, chunk_base, chunk_factor); + } if (i >= NUM_RESULT) continue; @@ -1340,12 +1385,18 @@ static int __init mtrr_cleanup(unsigned address_bits) /* print out all */ for (i = 0; i < NUM_RESULT; i++) { - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", - result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad?"-":"", + lose_base, lose_factor); } /* try to find the optimal index */ @@ -1370,14 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) } if (index_good != -1) { + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); i = index_good; - printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", - result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", - result[i].num_reg, - result[i].lose_cover_sizek >> 10); + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", + result[i].num_reg, lose_base, lose_factor); /* convert ranges to var ranges state */ chunk_size = result[i].chunk_sizek; chunk_size <<= 10; -- cgit v1.2.3 From 4624065731751a3ace88e5824d8e5654e2d7abd3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:12 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M one have gran < 1M reg00: base=0xd8000000 (3456MB), size= 128MB: uncachable, count=1 reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1 reg02: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1 reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1 reg05: base=0xd7f80000 (3455MB), size= 512KB: uncachable, count=1 will get Found optimal setting for mtrr clean up gran_size: 512K chunk_size: 2M num_reg: 7 lose RAM: 0G range0: 0000000000000000 - 00000000d8000000 Setting variable MTRR 0, base: 0GB, range: 2GB, type WB Setting variable MTRR 1, base: 2GB, range: 1GB, type WB Setting variable MTRR 2, base: 3GB, range: 256MB, type WB Setting variable MTRR 3, base: 3328MB, range: 128MB, type WB hole: 00000000d7f00000 - 00000000d7f80000 Setting variable MTRR 4, base: 3455MB, range: 512KB, type UC rangeX: 0000000100000000 - 0000000128000000 Setting variable MTRR 5, base: 4GB, range: 512MB, type WB Setting variable MTRR 6, base: 4608MB, range: 128MB, type WB so start from 64k instead of 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b1bd1038c913..8f1a342b16b7 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1197,11 +1197,11 @@ struct mtrr_cleanup_result { }; /* - * gran_size: 1M, 2M, ..., 2G + * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G * chunk size: gran_size, ..., 2G - * so we need (1+12)*6 + * so we need (1+16)*8 */ -#define NUM_RESULT 78 +#define NUM_RESULT 136 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1323,7 +1323,7 @@ static int __init mtrr_cleanup(unsigned address_bits) i = 0; memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { char gran_factor; unsigned long gran_base; -- cgit v1.2.3 From 3dcd7e269d2223126f6ee9bc893f5a6166e1770d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Tue, 30 Sep 2008 10:02:52 +0200 Subject: x86: fix typo in enable_mtrr_cleanup early parameter Correct typo for 'enable_mtrr_cleanup' early boot param name. Signed-off-by: J.A. Magallon Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a564..885c8265e6b5 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str) enable_mtrr_cleanup = 1; return 0; } -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); struct var_mtrr_state { unsigned long range_startk; -- cgit v1.2.3 From 9b1568458a3ef006361710dc12848aec891883b5 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 29 Sep 2008 14:52:03 -0400 Subject: x86, debug printouts: IOMMU setup failures should not be KERN_ERR The number of BIOSes that have an option to enable the IOMMU, or fix anything about its configuration, is vanishingly small. There's no good reason to punish quiet boot for this. Signed-off-by: Adam Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e21826db11..9a32b37ee2ee 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -455,11 +455,11 @@ out: force_iommu || valid_agp || fallback_aper_force) { - printk(KERN_ERR + printk(KERN_INFO "Your BIOS doesn't leave a aperture memory hole\n"); - printk(KERN_ERR + printk(KERN_INFO "Please enable the IOMMU option in the BIOS setup\n"); - printk(KERN_ERR + printk(KERN_INFO "This costs you %d MB of RAM\n", 32 << fallback_aper_order); -- cgit v1.2.3 From de59985e3a623d4d5d6207f1777398ca0606ab1c Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86: Fix broken LDT access in VMI After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be Signed-off-by: Zachary Amsden Cc: Ingo Molnar Cc: Glauber de Oliveira Costa Cc: Signed-off-by: Linus Torvalds --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db54..edfb09f30479 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From dc63b52673d71f9d49b9d72d263a9f32df18c3ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86, vmi: fix broken LDT access This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. Signed-off-by: Zachary Amsden Cc: Glauber de Oliveira Costa Cc: stable@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db54..edfb09f30479 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From 40becd8d5af03ee7935e79c3fccd0d1f380d95b4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 29 Sep 2008 00:06:36 +0900 Subject: AMD IOMMU: use iommu_device_max_index AMD IOMMU can use iommu_device_max_index() instead of the homegrown function. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c19212191c98..3b346c6f5514 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -470,10 +470,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * efficient allocator. * ****************************************************************************/ -static unsigned long dma_mask_to_pages(unsigned long mask) -{ - return PAGE_ALIGN(mask) >> PAGE_SHIFT; -} /* * The address allocator core function. @@ -486,14 +482,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(dma_mask); + unsigned long limit; unsigned long address; - unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = limit < size ? limit : size; + limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, + dma_mask >> PAGE_SHIFT); if (dom->next_bit >= limit) { dom->next_bit = 0; -- cgit v1.2.3 From fd1452ebf257317f24e0e285a17a2ec2ce3e6df7 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 2 Oct 2008 16:56:19 +0300 Subject: x86/microcode: fix sleeping function called from invalid context at kernel/mutex.c Fix the following "sleeping function called from invalid context" bug: ... __might_sleep mutex_lock_nested microcode_update_cpu mc_sysdev_resume __sysdev_resume sysdev_resume device_power_up ... Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 8db2eb55e9e2..936d8d55f230 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -324,10 +324,6 @@ void microcode_update_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; int err = 0; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - - mutex_lock(µcode_mutex); /* * Check if the system resume is in progress (uci->valid != NULL), * otherwise just request a firmware: @@ -340,11 +336,8 @@ void microcode_update_cpu(int cpu) err = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); } - if (!err) microcode_ops->apply_microcode(cpu); - - mutex_unlock(µcode_mutex); } static void microcode_init_cpu(int cpu) @@ -352,7 +345,13 @@ static void microcode_init_cpu(int cpu) cpumask_t old = current->cpus_allowed; set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); microcode_update_cpu(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); } -- cgit v1.2.3 From 136c82c6f7f12c9bed8bf709f92123077966512c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Fri, 3 Oct 2008 00:32:37 +0200 Subject: x86: mtrr_cleanup try gran_size to less than 1M, cleanup Patch below cleans up formatting, with space for big bases and sizes (64 Gb). Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb3715..4c4214690dd1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) } /* RED-PEN: base can be > 32bit */ len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), mtrr_usage_table[i]); + mtrr_usage_table[i], mtrr_attrib_to_str(type)); } } return 0; -- cgit v1.2.3 From 834836ee6a3a9deadff9394b23db965a08bcde35 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 15:46:20 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M, v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit J.A. Magallón reported: >> Also, on a 64 bit box with 4Gb, it gives this: >> >> cicely:~# cat /proc/mtrr >> reg00: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 >> reg01: base=0x100000000 (4096MB), size=1024MB: write-back, count=1 >> reg02: base=0x140000000 (5120MB), size= 512MB: write-back, count=1 >> reg03: base=0x160000000 (5632MB), size= 256MB: write-back, count=1 >> reg04: base=0x80000000 (2048MB), size=2048MB: uncachable, count=1 boundary handling has a problem ... fix it. Reported-by: J.A. Magallón Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8f1a342b16b7..b4a3f0c1a5e3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1044,7 +1044,7 @@ second_try: hole_sizek = range0_sizek - state->range_sizek - second_sizek; /* hole size should be less than half of range0 size */ - if (hole_sizek > (range0_sizek >> 1) && + if (hole_sizek >= (range0_sizek >> 1) && range0_sizek >= chunk_sizek) { range0_sizek -= chunk_sizek; second_sizek = 0; -- cgit v1.2.3 From 8bb39311bf461243f6cade3644764d848516dd23 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 23:26:59 -0700 Subject: x86, debug: mtrr_cleanup print out var mtrr before change it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b4a3f0c1a5e3..406074c46f1e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1211,13 +1211,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; - unsigned long i, base, size, def, dummy; + unsigned long base, size, def, dummy; mtrr_type type; int nr_range, nr_range_new; u64 chunk_size, gran_size; unsigned long range_sums, range_sums_new; int index_good; int num_reg_good; + int i; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1259,6 +1260,28 @@ static int __init mtrr_cleanup(unsigned address_bits) num_var_ranges - num[MTRR_NUM_TYPES]) return 0; + /* print original var MTRRs at first, for debugging: */ + printk(KERN_DEBUG "original variable MTRRs\n"); + for (i = 0; i < num_var_ranges; i++) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); + if (!size_base) + continue; + + size_base = to_size_factor(size_base, &size_factor), + start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); + start_base = to_size_factor(start_base, &start_factor), + + printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + i, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ); + } + memset(range, 0, sizeof(range)); extra_remove_size = 0; if (mtrr_tom2) { -- cgit v1.2.3 From a2e8d3dcfd420177aaa0c53aca60a869bad75f4b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 2 Oct 2008 22:09:20 -0700 Subject: x86: signal: move macros out from restore_sigcontext() move macros, COPY, COPY_SEG*, GET_SEG, out from restore_sigcontext(). x86_64: introduce COPY_SEG_STRICT for cs. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 38 +++++++++++++++++++++----------------- arch/x86/kernel/signal_64.c | 18 +++++++++++------- 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 4337cd510f0a..545448b7aeba 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -113,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx) return do_sigaltstack(uss, uoss, regs->sp); } +#define COPY(x) { \ + err |= __get_user(regs->x, &sc->x); \ +} + +#define COPY_SEG(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp; \ +} + +#define COPY_SEG_STRICT(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp | 3; \ +} + +#define GET_SEG(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg, tmp); \ +} /* * Do a signal return; undo the signal stack. @@ -126,23 +147,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) - -#define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->seg = tmp; } - -#define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->seg = tmp|3; } - -#define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg, tmp); } - GET_SEG(gs); COPY_SEG(fs); COPY_SEG(es); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 53f86d95985b..feff4a91d095 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } +#define COPY(x) { \ + err |= __get_user(regs->x, &sc->x); \ +} + +#define COPY_SEG_STRICT(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp | 3; \ +} + /* * Do a signal return; undo the signal stack. */ @@ -64,8 +74,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) (err |= __get_user(regs->x, &sc->x)) - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); COPY(r8); @@ -80,11 +88,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Kernel saves and restores only the CS segment register on signals, * which is the bare minimum needed to allow mixed 32/64-bit code. * App's signal handler can save/restore other segments if needed. */ - { - unsigned cs; - err |= __get_user(cs, &sc->cs); - regs->cs = cs | 3; /* Force into user mode */ - } + COPY_SEG_STRICT(cs); { unsigned int tmpflags; -- cgit v1.2.3 From 69e13ad56f9e2cd81c4f8bfd6267211c10c14c08 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 2 Oct 2008 22:18:47 -0700 Subject: x86: signal: remove indent in restore_sigcontext() remove braces and indent for flags and fpstate in restore_sigcontext(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 21 +++++++-------------- arch/x86/kernel/signal_64.c | 19 +++++++------------ 2 files changed, 14 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 545448b7aeba..d6dd057d0f22 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -142,6 +142,8 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *pax) { + void __user *buf; + unsigned int tmpflags; unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ @@ -156,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, COPY_SEG_STRICT(cs); COPY_SEG_STRICT(ss); - { - unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | - (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ - } - - { - void __user *buf; - - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); - } + err |= __get_user(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); err |= __get_user(*pax, &sc->ax); return err; diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index feff4a91d095..a5c9627f4db9 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -69,6 +69,8 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *pax) { + void __user *buf; + unsigned int tmpflags; unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ @@ -90,19 +92,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, * App's signal handler can save/restore other segments if needed. */ COPY_SEG_STRICT(cs); - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ - } - - { - void __user *buf; + err |= __get_user(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); - } + err |= __get_user(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); err |= __get_user(*pax, &sc->ax); return err; -- cgit v1.2.3 From 175e438f7a2de9d94110046be48697969569736a Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Thu, 2 Oct 2008 17:32:06 -0500 Subject: x86: trivial printk fix in efi.c [patch] x86: Trivial printk fix in efi.c The following line is lacking a space between "memdesc" and "doesn't". "Kernel-defined memdescdoesn't match the one from EFI!" Fixed the printk by adding a space. Signed-off-by: Russ Anderson Cc: Russ Anderson Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b1..945a31cdd81f 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -414,9 +414,11 @@ void __init efi_init(void) if (memmap.map == NULL) printk(KERN_ERR "Could not map the EFI memory map!\n"); memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + if (memmap.desc_size != sizeof(efi_memory_desc_t)) - printk(KERN_WARNING "Kernel-defined memdesc" - "doesn't match the one from EFI!\n"); + printk(KERN_WARNING + "Kernel-defined memdesc doesn't match the one from EFI!\n"); + if (add_efi_memmap) do_add_efi_memmap(); -- cgit v1.2.3 From 42fde7a05c5d6dc76e518ae12091ea897b1c132b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 19:34:18 -0700 Subject: x86: mtrr_cleanup: print out correct type v2 Print out the correct type when the Write Protected (WP) type is seen. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 406074c46f1e..9086b38fbabe 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1273,12 +1273,14 @@ static int __init mtrr_cleanup(unsigned address_bits) size_base = to_size_factor(size_base, &size_factor), start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); start_base = to_size_factor(start_base, &start_factor), + type = range_state[i].type; printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ((type == MTRR_TYPE_WRPROT) ? "WP" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) ); } -- cgit v1.2.3 From 99e1aa17ce434010dd820b583628370cc15f10f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:32 -0700 Subject: x86: mtrr_cleanup: first 1M may be covered in var mtrrs The first 1M is don't care when it comes to the variables MTRRs. Cover it as WB as a heuristic approximation; this is generally what we want to minimize the number of registers. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 9086b38fbabe..663e530e08e0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1293,6 +1293,15 @@ static int __init mtrr_cleanup(unsigned address_bits) } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + /* + * [0, 1M) should always be coverred by var mtrr with WB + * and fixed mtrrs should take effective before var mtrr for it + */ + nr_range = add_range_with_merge(range, nr_range, 0, + (1ULL<<(20 - PAGE_SHIFT)) - 1); + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + range_sums = sum_ranges(range, nr_range); printk(KERN_INFO "total RAM coverred: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); -- cgit v1.2.3 From dd5523552c2897e3fde16fc2fc8f6332addf66ab Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:33 -0700 Subject: x86: mtrr_cleanup: treat WRPROT as UNCACHEABLE For the purpose of MTRR canonicalization, treat WRPROT as UNCACHEABLE. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 663e530e08e0..5994a9f78f3d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, /* take out UC ranges */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE) + if (type != MTRR_TYPE_UNCACHABLE && + type != MTRR_TYPE_WRPROT) continue; size = range_state[i].size_pfn; if (!size) @@ -1248,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) continue; if (!size) type = MTRR_NUM_TYPES; + if (type == MTRR_TYPE_WRPROT) + type = MTRR_TYPE_UNCACHABLE; num[type]++; } -- cgit v1.2.3 From d99e90164e6cf2eb85fa94d547d6336f8127a107 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 15:55:12 -0700 Subject: x86: gart iommu have direct mapping when agp is present too move init_memory_mapping() out of init_k8_gatt. for: http://bugzilla.kernel.org/show_bug.cgi?id=11676 2.6.27-rc2 to rc8, apgart fails, iommu=soft works, regression This is needed because we need to map the GART aperture even if the GATT is not initialized. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d5..be33a5442d82 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -626,7 +626,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) struct pci_dev *dev; void *gatt; int i, error; - unsigned long start_pfn, end_pfn; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -672,12 +671,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); - /* need to map that range */ - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { + start_pfn = (aper_base>>PAGE_SHIFT); + init_memory_mapping(start_pfn<> PAGE_SHIFT; -- cgit v1.2.3 From e84956f92a846246b09b34f2a728329c386d250f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 11:59:29 +0200 Subject: x86 ACPI: Blacklist two HP machines with buggy BIOSes There is a bug in the BIOSes of some HP boxes with AMD Turions which connects IO-APIC pins with ACPI thermal trip points in such a way that if the state of the IO-APIC is not as expected by the (buggy) BIOS, the thermal trip points are set to insanely low values (usually all of them become 16 degrees Celsius). As a result, thermal throttling kicks in and knock the system down to its shoes. Unfortunately some of the recent IO-APIC changes made the bug show up. To prevent this from happening, blacklist machines that are known to be affected (nx6115 and 6715b in this particular case). This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11516 listed as a regression from 2.6.26. On my box it was caused by: commit 691874fa96d6349a8b60f8ea9c2bae52ece79941 Author: Maciej W. Rozycki Date: Tue May 27 21:19:51 2008 +0100 x86: I/O APIC: timer through 8259A second-chance Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar and the whole story is described in this (huge) thread: http://marc.info/?l=linux-kernel&m=121358440508410&w=4 Matthew Garrett told us about that happening on the nx6125: http://marc.info/?l=linux-kernel&m=121396307411930&w=4 and then Maciej analysed the breakage on the basis of a DSDT from the nx6325: http://marc.info/?l=linux-kernel&m=121401068718826&w=4 As far as the Dmitry's and Jason's boxes are concerned, I recognized the symptoms and asked them to verify that the blacklisting helped. It appears that the buggy BIOS code has been copy-pasted to the entire range of machines, for no good reason. Signed-off-by: Rafael J. Wysocki Tested-by: Dmitry Torokhov Tested-by: Jason Vas Dias Signed-off-by: Linus Torvalds --- arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bfd10fd211cd..c102af85df9c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1603,6 +1603,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * is not connected at all. Force ignoring BIOS IRQ0 pin2 * override in that cases. */ + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP nx6115 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), + }, + }, { .callback = dmi_ignore_irq0_timer_override, .ident = "HP NX6125 laptop", @@ -1619,6 +1627,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP 6715b laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, {} }; -- cgit v1.2.3 From e85ceae9102f6e3c1d707e7ac88fa48d252e9cfa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 6 Oct 2008 13:50:59 -0500 Subject: kgdb, x86: Avoid invoking kgdb_nmicallback twice per NMI Stress-testing KVM's latest NMI support with kgdbts inside an SMP guest, I came across spurious unhandled NMIs while running the singlestep test. Looking closer at the code path each NMI takes when KGDB is enabled, I noticed that kgdb_nmicallback is called twice per event: One time via DIE_NMI_IPI notification, the second time on DIE_NMI. Removing the first invocation cures the unhandled NMIs here. Signed-off-by: Jan Kiszka Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8282a2139681..10435a120d22 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -455,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; case DIE_NMI_IPI: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - } + /* Just ignore, we will handle the roundup on DIE_NMI. */ return NOTIFY_DONE; case DIE_NMIUNKNOWN: -- cgit v1.2.3 From 33fb0e4eb53f16af312f9698f974e2e64af39c12 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 7 Oct 2008 00:11:22 +0200 Subject: x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC On some HP nx6... laptops (e.g. nx6325) BIOS reports an IRQ0 override but the SB450 chipset is configured such that timer interrupts goe to INT0 of IOAPIC. Check IRQ0 routing and if it is routed to INT0 of IOAPIC skip the timer override. [ This more generic PCI ID based quirk should alleviate the need for dmi_ignore_irq0_timer_override DMI quirks. ] Signed-off-by: Andreas Herrmann Acked-by: "Maciej W. Rozycki" Tested-by: Dmitry Torokhov Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..6b839b147644 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func) } +static u32 ati_ixp4x0_rev(int num, int slot, int func) +{ + u32 d; + u8 b; + + b = read_pci_config_byte(num, slot, func, 0xac); + b &= ~(1<<5); + write_pci_config_byte(num, slot, func, 0xac, b); + + d = read_pci_config(num, slot, func, 0x70); + d |= 1<<8; + write_pci_config(num, slot, func, 0x70, d); + + d = read_pci_config(num, slot, func, 0x8); + d &= 0xff; + return d; +} + +static void __init ati_bugs(int num, int slot, int func) +{ +#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC) + u32 d; + u8 b; + + if (acpi_use_timer_override) + return; + + d = ati_ixp4x0_rev(num, slot, func); + if (d < 0x82) + acpi_skip_timer_override = 1; + else { + /* check for IRQ0 interrupt swap */ + outb(0x72, 0xcd6); b = inb(0xcd7); + if (!(b & 0x2)) + acpi_skip_timer_override = 1; + } + + if (acpi_skip_timer_override) { + printk(KERN_INFO "SB4X0 revision 0x%x\n", d); + printk(KERN_INFO "Ignoring ACPI timer override.\n"); + printk(KERN_INFO "If you got timer trouble " + "try acpi_use_timer_override\n"); + } +#endif +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +160,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, + { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, {} }; -- cgit v1.2.3 From 8d89adf44cf750e49691ba5b744b2ad77a05e997 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Oct 2008 06:47:52 +0200 Subject: x86: SB450: deprioritize DMI quirks This PCI ID based quick should be a full solution for the IRQ0 override related slowdown problem on SB450 based systems: 33fb0e4: x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC Emit a warning in those cases where the DMI quirk triggers but the PCI ID based quirk didnt. If this warning does not trigger then we can phase out the DMI quirks. Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c102af85df9c..096102d9b24e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1421,8 +1421,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); - acpi_skip_timer_override = 1; + /* + * The ati_ixp4x0_rev() early PCI quirk should have set + * the acpi_skip_timer_override flag already: + */ + if (!acpi_skip_timer_override) { + WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); + pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + d->ident); + acpi_skip_timer_override = 1; + } return 0; } -- cgit v1.2.3 From f364eadab59b316ea0bd9f9bc01af0ad89065569 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 7 Oct 2008 14:04:27 -0700 Subject: x86: xsave: fix error condition in save_i387_xstate() Actually return failure on error. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index ed5274a5bb0f..448fde96963c 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -121,6 +121,8 @@ int save_i387_xstate(void __user *buf) err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *) (buf + sig_xstate_size - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return err; } return 1; -- cgit v1.2.3 From 04944b793e18ece23f63c0252646b310c1845940 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 7 Oct 2008 14:04:28 -0700 Subject: x86: xsave: set FP, SSE bits in the xsave header in the user sigcontext If a processor implementation discern that a processor state component is in its initialized state, it may modify the corresponding bit in the xsave header.xstate_bv as '0'. State in the memory layout setup by 'xsave' will be consistent with the bit values in the header. During signal handling, legacy applications may change the FP/SSE bits in the sigcontext memory layout without touching the FP/SSE header bits in the xsave header. So always set FP/SSE bits in the xsave header while saving the sigcontext state to the user space. During signal return, this will enable the kernel to capture any changes to the FP/SSE bits by the legacy applications which don't touch xsave headers. xsave aware apps can change the xstate_bv in the xsave header aswell as change any contents in the memory layout. xrestor as part of sigreturn will capture all the changes. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i387.c | 14 ++++++++++++++ arch/x86/kernel/xsave.c | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 45723f1fe198..1f20608d4ca8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) static int save_i387_xsave(void __user *buf) { + struct task_struct *tsk = current; struct _fpstate_ia32 __user *fx = buf; int err = 0; + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. + * This will enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware applications can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + if (save_i387_fxsave(fx) < 0) return -1; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 448fde96963c..2f98323716d9 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf) if (task_thread_info(tsk)->status & TS_XSAVE) { struct _fpstate __user *fx = buf; + struct _xstate __user *x = buf; + u64 xstate_bv; err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, sizeof(struct _fpx_sw_bytes)); @@ -121,6 +123,29 @@ int save_i387_xstate(void __user *buf) err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *) (buf + sig_xstate_size - FP_XSTATE_MAGIC2_SIZE)); + + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers and + * set the FP/SSE bits. + */ + err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; + + err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + if (err) return err; } -- cgit v1.2.3 From 8d5922572038bae9f7b16fcb974eee806727b44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9meth=20M=C3=A1rton?= Date: Thu, 9 Oct 2008 14:59:17 +0200 Subject: [CPUFREQ] correct broken links and email addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the no longer working links and email address in the documentation and in source code. Signed-off-by: Márton Németh Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb91fbd..b8e05ee4f736 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c01cc36..3b5f06423e77 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -26,7 +26,7 @@ #include #define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" +#define MAINTAINER "cpufreq@vger.kernel.org" #define dprintk(msg...) \ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) -- cgit v1.2.3 From 18c6faa9624bf5d9d7da3906a8a1b909dba5899b Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 13:00:49 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/elanfreq.c Before: total: 15 errors, 10 warnings, 308 lines checked After: total: 0 errors, 4 warnings, 308 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/elafreq.o.* add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.after add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.before paolo@paolo-desktop:~/linux.trees.git$ size /tmp/elafreq.o.* text data bss dec hex filename 934 270 4 1208 4b8 /tmp/elafreq.o.after 934 270 4 1208 4b8 /tmp/elafreq.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index e4a4bf870e94..fe613c93b366 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ @@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) u8 clockspeed_reg; /* Clock Speed Register */ local_irq_disable(); - outb_p(0x80,REG_CSCIR); + outb_p(0x80, REG_CSCIR); clockspeed_reg = inb_p(REG_CSCDR); local_irq_enable(); @@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) } /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0)==0xA0) + if ((clockspeed_reg & 0xE0) == 0xA0) return 33000; - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); + return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; } @@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) * There is no return value. */ -static void elanfreq_set_cpu_state (unsigned int state) +static void elanfreq_set_cpu_state(unsigned int state) { struct cpufreq_freqs freqs; @@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state) */ local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00,REG_CSCDR); + outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00, REG_CSCDR); local_irq_enable(); /* wait till internal pipelines and */ udelay(1000); /* buffers have cleaned up */ local_irq_disable(); /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80,REG_CSCIR); - outb_p(elan_multiplier[state].val80h,REG_CSCDR); + outb_p(0x80, REG_CSCIR); + outb_p(elan_multiplier[state].val80h, REG_CSCDR); /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40,REG_CSCIR); - outb_p(elan_multiplier[state].val40h,REG_CSCDR); + outb_p(0x40, REG_CSCIR); + outb_p(elan_multiplier[state].val40h, REG_CSCDR); udelay(10000); local_irq_enable(); @@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state) * for the hardware supported by the driver. */ -static int elanfreq_verify (struct cpufreq_policy *policy) +static int elanfreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); } -static int elanfreq_target (struct cpufreq_policy *policy, +static int elanfreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) /* capability check */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) + (c->x86 != 4) || (c->x86_model != 10)) return -ENODEV; /* max freq */ @@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) max_freq = elanfreq_get_cpu_frequency(0); /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { if (elanfreq_table[i].frequency > max_freq) elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); return 0; @@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup); #endif -static struct freq_attr* elanfreq_attr[] = { +static struct freq_attr *elanfreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -284,9 +284,9 @@ static int __init elanfreq_init(void) /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) { + (c->x86 != 4) || (c->x86_model != 10)) { printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; + return -ENODEV; } return cpufreq_register_driver(&elanfreq_driver); } @@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void) } -module_param (max_freq, int, 0444); +module_param(max_freq, int, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); -- cgit v1.2.3 From 8d2d2051e50c4ca0207a42b243369e22f7d90a5c Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 21:08:16 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/powernow-k6.c Before: total: 11 errors, 15 warnings, 255 lines checked After: total: 0 errors, 6 warnings, 254 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/powernow-k6.o.* 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.after 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 41 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index eb9b62b0830c..b5ced806a316 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -15,12 +15,11 @@ #include #include -#include -#include +#include +#include - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void) * * Tries to change the PowerNow! multiplier */ -static void powernow_k6_set_state (unsigned int best_i) +static void powernow_k6_set_state(unsigned int best_i) { - unsigned long outvalue=0, invalue=0; + unsigned long outvalue = 0, invalue = 0; unsigned long msrval; struct cpufreq_freqs freqs; @@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); invalue = invalue & 0xf; outvalue = outvalue | invalue; - outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + outl(outvalue , (POWERNOW_IOPORT + 0x8)); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) * * sets a new CPUFreq policy */ -static int powernow_k6_target (struct cpufreq_policy *policy, +static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) busfreq = cpu_khz / max_multiplier; /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { if (clock_ratio[i].index > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else @@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); @@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) { unsigned int i; - for (i=0; i<8; i++) { - if (i==max_multiplier) + for (i = 0; i < 8; i++) { + if (i == max_multiplier) powernow_k6_set_state(i); } cpufreq_frequency_table_put_attr(policy->cpu); @@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu) return busfreq * powernow_k6_get_cpu_multiplier(); } -static struct freq_attr* powernow_k6_attr[] = { +static struct freq_attr *powernow_k6_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -227,7 +226,7 @@ static int __init powernow_k6_init(void) } if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); return -EINVAL; } @@ -243,13 +242,13 @@ static int __init powernow_k6_init(void) static void __exit powernow_k6_exit(void) { cpufreq_unregister_driver(&powernow_k6_driver); - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); } -MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE("GPL"); module_init(powernow_k6_init); module_exit(powernow_k6_exit); -- cgit v1.2.3 From 847aef6ffd85787b62395b64719f8f7c5975bf1b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 14 Jul 2008 11:59:44 +0900 Subject: [CPUFREQ] acpi-cpufreq: add error handling for cpufreq_register_driver() error add error handling for cpufreq_register_driver() error Signed-off-by: Akinobu Mita Cc: cpufreq@lists.linux.org.uk Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b835839..1def5b06fa4a 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -785,7 +785,11 @@ static int __init acpi_cpufreq_init(void) if (ret) return ret; - return cpufreq_register_driver(&acpi_cpufreq_driver); + ret = cpufreq_register_driver(&acpi_cpufreq_driver); + if (ret) + free_percpu(acpi_perf_data); + + return ret; } static void __exit acpi_cpufreq_exit(void) @@ -795,8 +799,6 @@ static void __exit acpi_cpufreq_exit(void) cpufreq_unregister_driver(&acpi_cpufreq_driver); free_percpu(acpi_perf_data); - - return; } module_param(acpi_pstate_strict, uint, 0644); -- cgit v1.2.3 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1def5b06fa4a..c24c4a487b7c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ -static unsigned int get_measured_perf(unsigned int cpu) +static unsigned int get_measured_perf(struct cpufreq_policy *policy, + unsigned int cpu) { union { struct { @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) #endif - retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; + retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; put_cpu(); set_cpus_allowed_ptr(current, &saved_mask); -- cgit v1.2.3 From 43603c8df97f246e8be7b9cc92a8f968a85108bd Mon Sep 17 00:00:00 2001 From: Hans Schou Date: Thu, 9 Oct 2008 20:47:24 +0200 Subject: x86, debug: print more information about unknown CPUs Write the name of the unknown vendor_id to output instead of just "unknown". Tag changed to 'vendor_id' as used in /proc/cpuinfo Signed-off-by: Hans Schou Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cef3519b3bbb..84c4040efa80 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -406,7 +406,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } -- cgit v1.2.3 From b2bc27314664c4d1a2f02e6f4cd0c32e4681d61e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:36 -0700 Subject: x86, cpa: rename PTE attribute macros for kernel direct mapping in early boot Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 34 +++++++++++++++------------------- arch/x86/kernel/head_64.S | 4 ++-- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377a..e835b4eea70b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 * * Note that the stack is not yet set up! */ -#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ -#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ -#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ - default_entry: #ifdef CONFIG_X86_PAE @@ -196,9 +192,9 @@ default_entry: movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ movl %ecx,(%edx) /* Store PMD entry */ /* Upper half already zero */ addl $8,%edx @@ -215,7 +211,7 @@ default_entry: * End condition: we must map up to and including INIT_MAP_BEYOND_END * bytes beyond the end of our own page tables. */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b 1: @@ -224,7 +220,7 @@ default_entry: movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) #else /* Not PAE */ @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ addl $4,%edx @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); * bytes beyond the end of our own page tables; the +0x007 is * the attribute bits */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,pa(init_pg_tables_end) @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_dir+0xffc) #endif jmp 3f @@ -634,19 +630,19 @@ ENTRY(empty_zero_page) /* Page-aligned for the benefit of paravirt? */ .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) - .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 # elif KPMDS == 2 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 # elif KPMDS == 1 .long 0,0 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 # else # error "Kernel PMDs should be 1, 2 or 3" # endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe886..26cfdc1d7c7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -110,7 +110,7 @@ startup_64: movq %rdi, %rax shrq $PMD_SHIFT, %rax andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx + leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx leaq level2_spare_pgt(%rip), %rbx movq %rdx, 0(%rbx, %rax, 8) ident_complete: @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) /* Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) /* -- cgit v1.2.3 From e1e23bb0513520035ec934fa3483507cb6648b7c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 7 Oct 2008 14:15:11 -0700 Subject: x86: avoid dereferencing beyond stack + THREAD_SIZE It's possible for get_wchan() to dereference past task->stack + THREAD_SIZE while iterating through instruction pointers if fp equals the upper boundary, causing a kernel panic. Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 2a8ccb9238b4..b6b508ea7110 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -754,12 +754,12 @@ unsigned long get_wchan(struct task_struct *p) if (!p || p == current || p->state == TASK_RUNNING) return 0; stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) + if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); do { if (fp < (unsigned long)stack || - fp > (unsigned long)stack+THREAD_SIZE) + fp >= (unsigned long)stack+THREAD_SIZE) return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) -- cgit v1.2.3 From cb58ffc3889f0545628f138f849e759a331b8ddc Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Sun, 12 Oct 2008 10:51:03 +0200 Subject: x86: fix early panic on amd64 due to typo in supported CPU section Do not crash when enumerating supported CPU architectures SECURITY_INIT somehow ended up in x86_cpu_dev.init section. That caused printk in code which prints supported architectures to hit #GP due to non-canonical address being used. Signed-off-by: Petr Vandrovec Cc: thomas.petazzoni@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 201e81a91a95..46e05447405b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -172,8 +172,8 @@ SECTIONS .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { *(.x86_cpu_dev.init) } - SECURITY_INIT __x86_cpu_dev_end = .; + SECURITY_INIT . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { -- cgit v1.2.3 From 325af5fb1418c79953db0954556de048e061d8b6 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 8 Aug 2008 15:58:39 -0700 Subject: x86: ioperm user_regset This adds a user_regset type for the x86 io permissions bitmap. This makes it appear in core dumps (when ioperm has been used). It will also make it visible to debuggers in the future. Signed-off-by: Roland McGrath Signed-off-by: H. Peter Anvin [conflict resolutions: Signed-off-by: Ingo Molnar ] --- arch/x86/kernel/ptrace.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e375b658efc3..4e1ef66c2ea4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -40,7 +40,9 @@ enum x86_regset { REGSET_GENERAL, REGSET_FP, REGSET_XFP, + REGSET_IOPERM64 = REGSET_XFP, REGSET_TLS, + REGSET_IOPERM32, }; /* @@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } +/* + * These access the current or another (stopped) task's io permission + * bitmap for debugging or core dump. + */ +static int ioperm_active(struct task_struct *target, + const struct user_regset *regset) +{ + return target->thread.io_bitmap_max / regset->size; +} + +static int ioperm_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (!target->thread.io_bitmap_ptr) + return -ENXIO; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.io_bitmap_ptr, + 0, IO_BITMAP_BYTES); +} + #ifdef CONFIG_X86_PTRACE_BTS /* * The configuration for a particular BTS hardware implementation. @@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = { .size = sizeof(long), .align = sizeof(long), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_IOPERM64] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_LONGS, + .size = sizeof(long), .align = sizeof(long), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_64_view = { @@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = { .active = regset_tls_active, .get = regset_tls_get, .set = regset_tls_set }, + [REGSET_IOPERM32] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_BYTES / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_32_view = { -- cgit v1.2.3 From 9f482807a6bd7e2aa1ed0d8cfc48463ec4ca3568 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 18 Aug 2008 12:59:32 +0200 Subject: x86, fpu: check __clear_user() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/xsave.c: In function ‘save_i387_xstate’: arch/x86/kernel/xsave.c:98: warning: ignoring return value of ‘__clear_user’, declared with attribute warn_unused_result check the return value and act on it. We should not be ignoring faults at this point. Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2f98323716d9..9abac8a9d823 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf) * Start with clearing the user buffer. This will present a * clean context for the bytes not touched by the fxsave/xsave. */ - __clear_user(buf, sig_xstate_size); + err = __clear_user(buf, sig_xstate_size); + if (err) + return err; if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); -- cgit v1.2.3 From 45e96f26f257bd873017c6244a6cafd27f6f5439 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Aug 2008 10:37:14 +0200 Subject: warnings: fix arch/x86/kernel/early_printk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/early_printk.c:993: warning: ‘enable_debug_console’ defined but not used Eliminate dead code. Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 02fc5b40c14b..34ad997d3834 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -989,22 +989,4 @@ static int __init setup_early_printk(char *buf) return 0; } -static void __init enable_debug_console(char *buf) -{ -#ifdef DBGP_DEBUG - struct console *old_early_console = NULL; - - if (early_console_initialized && early_console) { - old_early_console = early_console; - unregister_console(early_console); - early_console_initialized = 0; - } - - setup_early_printk(buf); - - if (early_console == old_early_console && old_early_console) - register_console(old_early_console); -#endif -} - early_param("earlyprintk", setup_early_printk); -- cgit v1.2.3 From d562353a4533c4671af683499191707c9a77c406 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 12 Oct 2008 15:22:22 +0200 Subject: warnings: fix arch/x86/kernel/io_apic_64.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/io_apic_64.c: In function ‘print_local_APIC’: arch/x86/kernel/io_apic_64.c:1284: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/x86/kernel/io_apic_64.c:1285: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ We want to print the two halves of 'icr' at 32 bit width. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index a1bec2969c6a..02063ae042f7 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1281,8 +1281,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From 8a66712ba0969aea5580b9e312badfc2490fc614 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 12 Oct 2008 15:24:53 +0200 Subject: x86, amd-iommu: propagate PCI device enabling error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit propagate an error in enabling the PCI device. Also eliminates this warning: arch/x86/kernel/amd_iommu_init.c: In function ‘init_iommu_one’: arch/x86/kernel/amd_iommu_init.c:726: warning: ignoring return value of ‘pci_enable_device’, declared with attribute warn_unused_result Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 148fcfe22f17..4cd8083c58be 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -723,9 +723,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); - pci_enable_device(iommu->dev); - - return 0; + return pci_enable_device(iommu->dev); } /* -- cgit v1.2.3