From 1d93e52eb48df986a3c4d5ad8a520bf1f6837367 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2009 08:47:19 -0700 Subject: dmaengine: update kerneldoc Some of the kerneldoc comments in the dmaengine header describe already removed structure members. Remove them. Also add a short description for dma_device->device_is_tx_complete. Signed-off-by: Johannes Weiner Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3e68469c1885..087e79acf8c7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -97,7 +97,6 @@ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; /** * struct dma_chan_percpu - the per-CPU part of struct dma_chan - * @refcount: local_t used for open-coded "bigref" counting * @memcpy_count: transaction counter * @bytes_transferred: byte counter */ @@ -114,9 +113,6 @@ struct dma_chan_percpu { * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs * @dev: class device for sysfs - * @refcount: kref, used in "bigref" slow-mode - * @slow_ref: indicates that the DMA channel is free - * @rcu: the DMA channel's RCU head * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel @@ -211,8 +207,6 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability - * @refcount: reference count - * @done: IO completion struct * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -225,6 +219,7 @@ struct dma_async_tx_descriptor { * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_terminate_all: terminate all pending operations + * @device_is_tx_complete: poll for transaction completion * @device_issue_pending: push pending transactions to hardware */ struct dma_device { -- cgit v1.2.3 From b36128c830a8f5bd7d4981f5b0b69950f5928ee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: change percpu_ptr to per_cpu_ptr Impact: cleanup There are two allocated per-cpu accessor macros with almost identical spelling. The original and far more popular is per_cpu_ptr (44 files), so change over the other 4 files. tj: kill percpu_ptr() and update UP too Signed-off-by: Rusty Russell Cc: mingo@redhat.com Cc: lenb@kernel.org Cc: cpufreq@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3577ffd90d45..c80cfe1260ec 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -81,23 +81,13 @@ struct percpu_data { }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define percpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { @@ -122,6 +112,15 @@ static inline void percpu_free(void *__pdata) cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) #define free_percpu(ptr) percpu_free((ptr)) -#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) +/* + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should + * probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From 313e458f81ec3852106c5a83830fe0d4f405a71a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: add align argument to __alloc_percpu. This prepares for a real __alloc_percpu, by adding an alignment argument. Only one place uses __alloc_percpu directly, and that's for a string. tj: af_inet also uses __alloc_percpu(), update it. Signed-off-by: Rusty Russell Cc: Christoph Lameter Cc: Jens Axboe --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index c80cfe1260ec..1fdaee93c04d 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -108,9 +108,10 @@ static inline void percpu_free(void *__pdata) /* (legacy) interface for use without CPU hotplug handling */ -#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ +#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ cpu_possible_map) -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #define free_percpu(ptr) percpu_free((ptr)) /* * Use this to get to a cpu's version of the per-cpu object dynamically -- cgit v1.2.3 From f2a8205c4ef1af917d175c36a4097ae5587791c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: kill percpu_alloc() and friends Impact: kill unused functions percpu_alloc() and its friends never saw much action. It was supposed to replace the cpu-mask unaware __alloc_percpu() but it never happened and in fact __percpu_alloc_mask() itself never really grew proper up/down handling interface either (no exported interface for populate/depopulate). percpu allocation is about to go through major reimplementation and there's no reason to carry this unused interface around. Replace it with __alloc_percpu() and free_percpu(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1fdaee93c04d..d99e24ae1811 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,46 +82,43 @@ struct percpu_data { #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); -extern void percpu_free(void *__pdata); +/* + * Use this to get to a cpu's version of the per-cpu object + * dynamically allocated. Non-atomic access to the current CPU's + * version should probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) + +extern void *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void *__pdata); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +static inline void *__alloc_percpu(size_t size, size_t align) { + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > __alignof__(unsigned long long)); return kzalloc(size, gfp); } -static inline void percpu_free(void *__pdata) +static inline void free_percpu(void *p) { - kfree(__pdata); + kfree(p); } #endif /* CONFIG_SMP */ -#define percpu_alloc_mask(size, gfp, mask) \ - __percpu_alloc_mask((size), (gfp), &(mask)) - -#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) - -/* (legacy) interface for use without CPU hotplug handling */ - -#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ - cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -#define free_percpu(ptr) percpu_free((ptr)) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From f0aa6617903648077dffe5cfcf7c4458f4610fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: implement vm_area_register_early() Impact: allow multiple early vm areas There are places where kernel VM area needs to be allocated before vmalloc is initialized. This is done by allocating static vm_struct, initializing several fields and linking it to vmlist and later vmalloc initialization picking up these from vmlist. This is currently done manually and if there's more than one such areas, there's no defined way to arbitrate who gets which address. This patch implements vm_area_register_early(), which takes vm_area struct with flags and size initialized, assigns address to it and puts it on the vmlist. This way, multiple early vm areas can determine which addresses they should use. The only current user - alpha mm init - is converted to use it. Signed-off-by: Tejun Heo --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a986..bbc051392298 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,5 +106,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_register_early(struct vm_struct *vm); #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 8fc48985006da4ceba24508db64ec77fc0dfe3bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: add un/map_kernel_range_noflush() Impact: two more public map/unmap functions Implement map_kernel_range_noflush() and unmap_kernel_range_noflush(). These functions respectively map and unmap address range in kernel VM area but doesn't do any vcache or tlb flushing. These will be used by new percpu allocator. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bbc051392298..599ba7984310 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -91,6 +91,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); +extern int map_kernel_range_noflush(unsigned long start, unsigned long size, + pgprot_t prot, struct page **pages); +extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ -- cgit v1.2.3 From fbf59bc9d74d1fb30b8e0630743aff2806eafcea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: implement new dynamic percpu allocator Impact: new scalable dynamic percpu allocator which allows dynamic percpu areas to be accessed the same way as static ones Implement scalable dynamic percpu allocator which can be used for both static and dynamic percpu areas. This will allow static and dynamic areas to share faster direct access methods. This feature is optional and enabled only when CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is defined by arch. Please read comment on top of mm/percpu.c for details. Signed-off-by: Tejun Heo Cc: Andrew Morton --- include/linux/percpu.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d99e24ae1811..18080995ff3e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -76,23 +76,37 @@ #ifdef CONFIG_SMP -struct percpu_data { - void *ptrs[1]; -}; +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +extern void *pcpu_base_addr; +typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); + +extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size); /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) + +#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + +struct percpu_data { + void *ptrs[1]; +}; + +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) + #define per_cpu_ptr(ptr, cpu) \ ({ \ struct percpu_data *__p = __percpu_disguise(ptr); \ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); -- cgit v1.2.3 From c132937556f56ee4b831ef4b23f1846e05fde102 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:20 +0900 Subject: bootmem: clean up arch-specific bootmem wrapping Impact: cleaner and consistent bootmem wrapping By setting CONFIG_HAVE_ARCH_BOOTMEM_NODE, archs can define arch-specific wrappers for bootmem allocation. However, this is done a bit strangely in that only the high level convenience macros can be changed while lower level, but still exported, interface functions can't be wrapped. This not only is messy but also leads to strange situation where alloc_bootmem() does what the arch wants it to do but the equivalent __alloc_bootmem() call doesn't although they should be able to be used interchangeably. This patch updates bootmem such that archs can override / wrap the backend function - alloc_bootmem_core() instead of the highlevel interface functions to allow simpler and consistent wrapping. Also, HAVE_ARCH_BOOTMEM_NODE is renamed to HAVE_ARCH_BOOTMEM. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 95837bfb5256..3a87f93081ed 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -69,10 +69,9 @@ extern int reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -#endif - +extern int reserve_bootmem(unsigned long addr, + unsigned long size, + int flags); extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); @@ -94,7 +93,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE + #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ @@ -113,7 +112,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); -- cgit v1.2.3 From 2d0aae41695257603fc281b519677131ab5a752b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: bootmem: reorder interface functions and add a missing one Impact: cleanup and addition of missing interface wrapper The interface functions in bootmem.h was ordered in not so orderly manner. Reorder them such that * functions allocating the same area group together - ie. alloc_bootmem group and alloc_bootmem_low group. * functions w/o node parameter come before the ones w/ node parameter. * nopanic variants are immediately below their panicky counterparts. While at it, add alloc_bootmem_pages_node_nopanic() which was missing. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3a87f93081ed..455d83219fae 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -65,22 +65,20 @@ extern void free_bootmem(unsigned long addr, unsigned long size); #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) -extern int reserve_bootmem_node(pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size, - int flags); extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -extern void *__alloc_bootmem_nopanic(unsigned long size, +extern int reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size, + int flags); + +extern void *__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem(unsigned long size, +extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -89,6 +87,9 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -98,18 +99,21 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_nopanic(x) \ __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + +#define alloc_bootmem_low(x) \ + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -- cgit v1.2.3 From c0c0a29379b5848aec2e8f1c58d853d3cb7118b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: vmalloc: add @align to vm_area_register_early() Impact: allow larger alignment for early vmalloc area allocation Some early vmalloc users might want larger alignment, for example, for custom large page mapping. Add @align to vm_area_register_early(). While at it, drop docbook comment on non-existent @size. Signed-off-by: Tejun Heo Cc: Nick Piggin Cc: Ivan Kokshaysky --- include/linux/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 599ba7984310..2f6994fdf0e0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -109,6 +109,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm); +extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 8d408b4be37bc49c9086531f2ebe411cf5731746 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: percpu: give more latitude to arch specific first chunk initialization Impact: more latitude for first percpu chunk allocation The first percpu chunk serves the kernel static percpu area and may or may not contain extra room for further dynamic allocation. Initialization of the first chunk needs to be done before normal memory allocation service is up, so it has its own init path - pcpu_setup_static(). It seems archs need more latitude while initializing the first chunk for example to take advantage of large page mapping. This patch makes the following changes to allow this. * Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space to reserve in the first chunk for further dynamic allocation. * Rename pcpu_setup_static() to pcpu_setup_first_chunk(). * Make pcpu_setup_first_chunk() much more flexible by fetching page pointer by callback and adding optional @unit_size, @free_size and @base_addr arguments which allow archs to selectively part of chunk initialization to their likings. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 18080995ff3e..910beb0abea2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,12 +78,47 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +/* minimum unit size, also is the maximum supported allocation size */ +#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) + +/* + * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy + * back on the first chunk if arch is manually allocating and mapping + * it for faster access (as a part of large page mapping for example). + * Note that dynamic percpu allocator covers both static and dynamic + * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * + * On typical configuration with modules, the following values leave + * about 8k of free space on the first chunk after boot on both x86_32 + * and 64 when module support is enabled. When module support is + * disabled, it's much tighter. + */ +#ifndef PERCPU_DYNAMIC_RESERVE +# if BITS_PER_LONG > 32 +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# endif +# else +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# endif +# endif +#endif /* PERCPU_DYNAMIC_RESERVE */ + extern void *pcpu_base_addr; +typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); -extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, - struct page **pages, size_t cpu_size); +extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's -- cgit v1.2.3 From d2b0261506602bd969164879206027b30358ffdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:36:45 +0100 Subject: alloc_percpu: fix UP build Impact: build fix the !SMP branch had a 'gfp' leftover: include/linux/percpu.h: In function '__alloc_percpu': include/linux/percpu.h:160: error: 'gfp' undeclared (first use in this function) include/linux/percpu.h:160: error: (Each undeclared identifier is reported only once include/linux/percpu.h:160: error: for each function it appears in.) Use GFP_KERNEL like the SMP version does. Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 910beb0abea2..d8e5a9abbce0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -157,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > __alignof__(unsigned long long)); - return kzalloc(size, gfp); + return kzalloc(size, GFP_KERNEL); } static inline void free_percpu(void *p) -- cgit v1.2.3 From e317603694bfd17b28a40de9d65e1a4ec12f816e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Feb 2009 10:54:17 +0900 Subject: percpu: fix too low alignment restriction on UP UP __alloc_percpu() triggered WARN_ON_ONCE() if the requested alignment is larger than that of unsigned long long, which is too small for all the cacheline aligned allocations. Bump it up to SMP_CACHE_BYTES which kmalloc allocations generally guarantee. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d8e5a9abbce0..545b068bcb70 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -156,7 +156,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * on it. Larger alignment should only be used for module * percpu sections on SMP for which this path isn't used. */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); + WARN_ON_ONCE(align > SMP_CACHE_BYTES); return kzalloc(size, GFP_KERNEL); } -- cgit v1.2.3 From a682604838763981613e42015cd0e39f2989d6bb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 25 Feb 2009 18:03:42 -0800 Subject: rcu: Teach RCU that idle task is not quiscent state at boot This patch fixes a bug located by Vegard Nossum with the aid of kmemcheck, updated based on review comments from Nick Piggin, Ingo Molnar, and Andrew Morton. And cleans up the variable-name and function-name language. ;-) The boot CPU runs in the context of its idle thread during boot-up. During this time, idle_cpu(0) will always return nonzero, which will fool Classic and Hierarchical RCU into deciding that a large chunk of the boot-up sequence is a big long quiescent state. This in turn causes RCU to prematurely end grace periods during this time. This patch changes the rcutree.c and rcuclassic.c rcu_check_callbacks() function to ignore the idle task as a quiescent state until the system has started up the scheduler in rest_init(), introducing a new non-API function rcu_idle_now_means_idle() to inform RCU of this transition. RCU maintains an internal rcu_idle_cpu_truthful variable to track this state, which is then used by rcu_check_callback() to determine if it should believe idle_cpu(). Because this patch has the effect of disallowing RCU grace periods during long stretches of the boot-up sequence, this patch also introduces Josh Triplett's UP-only optimization that makes synchronize_rcu() be a no-op if num_online_cpus() returns 1. This allows boot-time code that calls synchronize_rcu() to proceed normally. Note, however, that RCU callbacks registered by call_rcu() will likely queue up until later in the boot sequence. Although rcuclassic and rcutree can also use this same optimization after boot completes, rcupreempt must restrict its use of this optimization to the portion of the boot sequence before the scheduler starts up, given that an rcupreempt RCU read-side critical section may be preeempted. In addition, this patch takes Nick Piggin's suggestion to make the system_state global variable be __read_mostly. Changes since v4: o Changes the name of the introduced function and variable to be less emotional. ;-) Changes since v3: o WARN_ON(nr_context_switches() > 0) to verify that RCU switches out of boot-time mode before the first context switch, as suggested by Nick Piggin. Changes since v2: o Created rcu_blocking_is_gp() internal-to-RCU API that determines whether a call to synchronize_rcu() is itself a grace period. o The definition of rcu_blocking_is_gp() for rcuclassic and rcutree checks to see if but a single CPU is online. o The definition of rcu_blocking_is_gp() for rcupreempt checks to see both if but a single CPU is online and if the system is still in early boot. This allows rcupreempt to again work correctly if running on a single CPU after booting is complete. o Added check to rcupreempt's synchronize_sched() for there being but one online CPU. Tested all three variants both SMP and !SMP, booted fine, passed a short rcutorture test on both x86 and Power. Located-by: Vegard Nossum Tested-by: Vegard Nossum Tested-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 6 ++++++ include/linux/rcupdate.h | 4 ++++ include/linux/rcupreempt.h | 15 +++++++++++++++ include/linux/rcutree.h | 6 ++++++ 4 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index f3f697df1d71..80044a4f3ab9 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -181,4 +181,10 @@ extern long rcu_batches_completed_bh(void); #define rcu_enter_nohz() do { } while (0) #define rcu_exit_nohz() do { } while (0) +/* A context switch is a grace period for rcuclassic. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 921340a7b71c..528343e6da51 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,6 +52,9 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; +/* Internal to kernel, but needed by rcupreempt.h. */ +extern int rcu_scheduler_active; + #if defined(CONFIG_CLASSIC_RCU) #include #elif defined(CONFIG_TREE_RCU) @@ -265,6 +268,7 @@ extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); +extern void rcu_scheduler_starting(void); extern int rcu_needs_cpu(int cpu); #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 3e05c09b54a2..74304b4538d8 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -142,4 +142,19 @@ static inline void rcu_exit_nohz(void) #define rcu_exit_nohz() do { } while (0) #endif /* CONFIG_NO_HZ */ +/* + * A context switch is a grace period for rcupreempt synchronize_rcu() + * only during early boot, before the scheduler has been initialized. + * So, how the heck do we get a context switch? Well, if the caller + * invokes synchronize_rcu(), they are willing to accept a context + * switch, so we simply pretend that one happened. + * + * After boot, there might be a blocked or preempted task in an RCU + * read-side critical section, so we cannot then take the fastpath. + */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1 && !rcu_scheduler_active; +} + #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d4368b7975c3..a722fb67bb2d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -326,4 +326,10 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ +/* A context switch is a grace period for rcutree. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUTREE_H */ -- cgit v1.2.3 From 54e991242850edc8c53f71fa5aa3ba7a93ce38f5 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Fri, 27 Feb 2009 15:13:54 +0530 Subject: sched: don't allow setuid to succeed if the user does not have rt bandwidth Impact: fix hung task with certain (non-default) rt-limit settings Corey Hickey reported that on using setuid to change the uid of a rt process, the process would be unkillable and not be running. This is because there was no rt runtime for that user group. Add in a check to see if a user can attach an rt task to its task group. On failure, return EINVAL, which is also returned in CONFIG_CGROUP_SCHED. Reported-by: Corey Hickey Signed-off-by: Dhaval Giani Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8981e52c714f..8c216e057c94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,9 +2291,13 @@ extern long sched_group_rt_runtime(struct task_group *tg); extern int sched_group_set_rt_period(struct task_group *tg, long rt_period_us); extern long sched_group_rt_period(struct task_group *tg); +extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif #endif +extern int task_can_switch_user(struct user_struct *up, + struct task_struct *tsk); + #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { -- cgit v1.2.3 From 9d40bbda599def1e1d155d7f7dca14fe8744bd2b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Mar 2009 23:46:25 -0800 Subject: vlan: Fix vlan-in-vlan crashes. As analyzed by Patrick McHardy, vlan needs to reset it's netdev_ops pointer in it's ->init() function but this leaves the compat method pointers stale. Add a netdev_resync_ops() and call it from the vlan code. Any other driver which changes ->netdev_ops after register_netdevice() will need to call this new function after doing so too. With help from Patrick McHardy. Tested-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec54785d34f9..659366734f3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1079,6 +1079,7 @@ extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); extern int init_dummy_netdev(struct net_device *dev); +extern void netdev_resync_ops(struct net_device *dev); extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); -- cgit v1.2.3 From e7d3ef13d52a126438f687a1a32da65ff926ed57 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 4 Mar 2009 11:59:46 -0800 Subject: libata: change drive ready wait after hard reset to 5s This fixes problems during resume with drives that take longer than 1s to be ready. The ATA-6 spec appears to allow 5 seconds for a drive to be ready. On one affected system, this patch changes "PM: resume devices took..." message from 17 seconds to 4 seconds, and gets rid of a lot of ugly timeout/error messages. Without this patch, the libata code moves on after 1s, tries to send a soft reset (which the drive doesn't see because it isn't ready) which also times out, then an IDENTIFY command is sent to the drive which times out, and finally the error handler will try to send another hard reset which will finally get things working. Signed-off-by: Stuart Hayes Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 5d87bc09a1f5..dd818c7decd7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -275,7 +275,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 1000, + ATA_TMOUT_PMP_SRST_WAIT = 5000, /* ATA bus states */ BUS_UNKNOWN = 0, -- cgit v1.2.3 From 5825627c9463581fd9e70f8285685889ae5bb9bb Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 27 Feb 2009 17:35:43 +0900 Subject: libata: fix dma_unmap_sg misuse libata passes the returned value of dma_map_sg() to dma_unmap_sg(),which is the misuse of dma_unmap_sg(). DMA-mapping.txt says: To unmap a scatterlist, just call: pci_unmap_sg(pdev, sglist, nents, direction); Again, make sure DMA activity has already finished. PLEASE NOTE: The 'nents' argument to the pci_unmap_sg call must be the _same_ one you passed into the pci_map_sg call, it should _NOT_ be the 'count' value _returned_ from the pci_map_sg call. Signed-off-by: FUJITA Tomonori Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index dd818c7decd7..fbf064e13ad5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -530,6 +530,7 @@ struct ata_queued_cmd { unsigned long flags; /* ATA_QCFLAG_xxx */ unsigned int tag; unsigned int n_elem; + unsigned int orig_n_elem; int dma_dir; -- cgit v1.2.3 From 84bda12af31f930e4200c5244aa111de2485d7b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 2 Mar 2009 18:53:26 +0900 Subject: libata: align ap->sector_buf ap->sector_buf is used as DMA target and should at least be aligned on cacheline. This caused problems on some embedded machines. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index fbf064e13ad5..dc18b87ed722 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -751,7 +751,8 @@ struct ata_port { acpi_handle acpi_handle; struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif - u8 sector_buf[ATA_SECT_SIZE]; /* owned by EH */ + /* owned by EH */ + u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* The following initializer overrides a method to NULL whether one of -- cgit v1.2.3 From 849d7130001ab740a5a4778a561049841fdd77c9 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 5 Mar 2009 16:10:57 +0100 Subject: ide: allow to wrap interrupt handler Signed-off-by: Stanislaw Gruszka Cc: Andrew Victor [bart: minor checkpatch.pl / CodingStyle fixups] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fe235b65207e..e0cedfe9fad4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -866,6 +866,7 @@ struct ide_host { unsigned int n_ports; struct device *dev[2]; unsigned int (*init_chipset)(struct pci_dev *); + irq_handler_t irq_handler; unsigned long host_flags; void *host_priv; ide_hwif_t *cur_port; /* for hosts requiring serialization */ -- cgit v1.2.3 From ebcad5aaea26da3cb2ca90b7f31a67a027eb60db Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 5 Mar 2009 16:10:59 +0100 Subject: remove stale comment from HDIO_GET_IDENTITY returns 256 words currently. Noticed by Norman Diamond. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/hdreg.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h index c37e9241fae7..ed21bd3dbd25 100644 --- a/include/linux/hdreg.h +++ b/include/linux/hdreg.h @@ -511,7 +511,6 @@ struct hd_driveid { unsigned short words69_70[2]; /* reserved words 69-70 * future command overlap and queuing */ - /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */ unsigned short words71_74[4]; /* reserved words 71-74 * for IDENTIFY PACKET DEVICE command */ -- cgit v1.2.3 From d42ad15b759d05a87f22b484af63987eff38ea88 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 5 Mar 2009 17:20:55 +0100 Subject: ata: add CFA specific identify data words Declare CFA specific identify data words 162 and 163 for future use. Signed-off-by: Sergei Shtylyov Signed-off-by: Jeff Garzik [bart: update patch summary/description] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 08a86d5cdf1b..9a061accd8b8 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -89,6 +89,8 @@ enum { ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, + ATA_ID_CFA_KEY_MGMT = 162, + ATA_ID_CFA_MODES = 163, ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), -- cgit v1.2.3 From 6a242909b01120f6f3d571c0b75e20ec61f0d8d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:58 +0900 Subject: percpu: clean up percpu constants Impact: cleaup Make the following cleanups. * There isn't much arch-specific about PERCPU_MODULE_RESERVE. Always define it whether arch overrides PERCPU_ENOUGH_ROOM or not. * blackfin overrides PERCPU_ENOUGH_ROOM to align static area size. Do it by default. * percpu allocation sizes doesn't have much to do with the page size. Don't use PAGE_SHIFT in their definition. Signed-off-by: Tejun Heo Cc: Bryan Wu --- include/linux/percpu.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 545b068bcb70..2d34b038fe70 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -5,6 +5,7 @@ #include /* For kmalloc() */ #include #include +#include #include @@ -52,17 +53,18 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ -#ifndef PERCPU_ENOUGH_ROOM +/* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 +#define PERCPU_MODULE_RESERVE (8 << 10) #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif +#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) -#endif /* PERCPU_ENOUGH_ROOM */ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) +#endif /* * Must be an lvalue. Since @var must be a simple identifier, @@ -79,7 +81,7 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -96,15 +98,15 @@ #ifndef PERCPU_DYNAMIC_RESERVE # if BITS_PER_LONG > 32 # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (24 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # endif # else # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (8 << 10) # endif # endif #endif /* PERCPU_DYNAMIC_RESERVE */ -- cgit v1.2.3 From 2441d15c97d498b18f03ae9fba262ffeae42a08b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: cosmetic renames in pcpu_setup_first_chunk() Impact: cosmetic, preparation for future changes Make the following renames in pcpur_setup_first_chunk() in preparation for future changes. * s/free_size/dyn_size/ * s/static_vm/first_vm/ * s/static_chunk/schunk/ Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2d34b038fe70..a0b4ea2a3354 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -118,7 +118,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit v1.2.3 From cafe8816b217b98dc3f268d3b77445da498beb4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: use negative for auto for pcpu_setup_first_chunk() arguments Impact: argument semantic cleanup In pcpu_setup_first_chunk(), zero @unit_size and @dyn_size meant auto-sizing. It's okay for @unit_size as 0 doesn't make sense but 0 dynamic reserve size is valid. Alos, if arch @dyn_size is calculated from other parameters, it might end up passing in 0 @dyn_size and malfunction when the size is automatically adjusted. This patch makes both @unit_size and @dyn_size ssize_t and use -1 for auto sizing. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a0b4ea2a3354..a96fc53bbd62 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,8 +117,9 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t dyn_size, void *base_addr, + size_t static_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit v1.2.3 From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a96fc53bbd62..8ff15153ae20 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { -- cgit v1.2.3 From 6b19b0c2400437a3c10059ede0e59b517092e1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: x86, percpu: setup reserved percpu area for x86_64 Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Reported-by: Jaswinder Singh Rajput --- include/linux/percpu.h | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ff15153ae20..54a968b4b924 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -85,31 +85,20 @@ /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * back on the first chunk for dynamic percpu allocation if arch is + * manually allocating and mapping it for faster access (as a part of + * large page mapping for example). * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. + * The following values give between one and two pages of free space + * after typical minimal boot (2-way SMP, single disk and NIC) with + * both defconfig and a distro config on x86_64 and 32. More + * intelligent way to determine this would be nice. */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (24 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (8 << 10) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ +#if BITS_PER_LONG > 32 +#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#else +#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#endif extern void *pcpu_base_addr; -- cgit v1.2.3 From ab96ddec7213004b632d24dc2cdcd2df5f16f50b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 7 Mar 2009 13:39:22 -0800 Subject: Input: serio - fix protocol number for TouchIT213 Protocol 0x37 has been reserved for iNexio devices and Sahara was supposed to get 0x38. Reported-by: Claudio Nieder Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index 1bcb357a01a1..e0417e4d3f15 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -212,7 +212,7 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_FUJITSU 0x35 #define SERIO_ZHENHUA 0x36 #define SERIO_INEXIO 0x37 -#define SERIO_TOUCHIT213 0x37 +#define SERIO_TOUCHIT213 0x38 #define SERIO_W8001 0x39 #endif -- cgit v1.2.3 From 129f8ae9b1b5be94517da76009ea956e89104ce8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 9 Mar 2009 15:07:33 -0400 Subject: Revert "[CPUFREQ] Disable sysfs ui for p4-clockmod." This reverts commit e088e4c9cdb618675874becb91b2fd581ee707e6. Removing the sysfs interface for p4-clockmod was flagged as a regression in bug 12826. Course of action: - Find out the remaining causes of overheating, and fix them if possible. ACPI should be doing the right thing automatically. If it isn't, we need to fix that. - mark p4-clockmod ui as deprecated - try again with the removal in six months. It's not really feasible to printk about the deprecation, because it needs to happen at all the sysfs entry points, which means adding a lot of strcmp("p4-clockmod".. calls to the core, which.. bleuch. Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 384b38d3e8e2..161042746afc 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -234,7 +234,6 @@ struct cpufreq_driver { int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; - bool hide_interface; }; /* flags */ -- cgit v1.2.3 From 6074d5b0a319fe8400ff079a3c289406ca024321 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: more flexibility for @dyn_size of pcpu_setup_first_chunk() Impact: cleanup, more flexibility for first chunk init Non-negative @dyn_size used to be allowed iff @unit_size wasn't auto. This restriction stemmed from implementation detail and made things a bit less intuitive. This patch allows @dyn_size to be specified regardless of @unit_size and swaps the positions of @dyn_size and @unit_size so that the parameter order makes more sense (static, reserved and dyn sizes followed by enclosing unit_size). While at it, add @unit_size >= PCPU_MIN_UNIT_SIZE sanity check. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 54a968b4b924..fb455dcc59c7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -107,7 +107,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); -- cgit v1.2.3 From 66c3a75772247c31feabefb724e082220a1ab060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: generalize embedding first chunk setup helper Impact: code reorganization Separate out embedding first chunk setup helper from x86 embedding first chunk allocator and put it in mm/percpu.c. This will be used by the default percpu first chunk allocator and possibly by other archs. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb455dcc59c7..ee5615d65211 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -111,6 +111,10 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); +extern ssize_t __init pcpu_embed_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's -- cgit v1.2.3 From ae46141ff08f1965b17c531b571953c39ce8b9e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Mar 2009 20:33:18 -0400 Subject: NFSv3: Fix posix ACL code Fix a memory leak due to allocation in the XDR layer. In cases where the RPC call needs to be retransmitted, we end up allocating new pages without clearing the old ones. Fix this by moving the allocation into nfs3_proc_setacls(). Also fix an issue discovered by Kevin Rudd, whereby the amount of memory reserved for the acls in the xdr_buf->head was miscalculated, and causing corruption. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ include/linux/nfsacl.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a550b528319f..2e5f00066afd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -406,6 +406,8 @@ struct nfs3_setaclargs { int mask; struct posix_acl * acl_access; struct posix_acl * acl_default; + size_t len; + unsigned int npages; struct page ** pages; }; diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 54487a99beb8..43011b69297c 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -37,6 +37,9 @@ #define NFSACL_MAXPAGES ((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \ >> PAGE_SHIFT) +#define NFS_ACL_MAX_ENTRIES_INLINE (5) +#define NFS_ACL_INLINE_BUFSIZE ((2*(2+3*NFS_ACL_MAX_ENTRIES_INLINE)) << 2) + static inline unsigned int nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default) { -- cgit v1.2.3 From 78851e1aa4c3b796d5f0bb11b445016726302b44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Mar 2009 20:33:19 -0400 Subject: NLM: Shrink the IPv4-only version of nlm_cmp_addr() Clean up/micro-optimatization: Make the AF_INET-only version of nlm_cmp_addr() smaller. This matches the style of nlm_privileged_requester(), and makes the AF_INET-only version of nlm_cmp_addr() nearly the same size as it was before IPv6 support. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index aa6fe7026de7..51855dfd8adb 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -346,6 +346,7 @@ static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { @@ -353,6 +354,13 @@ static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } +#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return 0; +} +#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ /* * Compare two host addresses -- cgit v1.2.3 From 76e6eee03353f01bfca707d4dbb1f10a4ee27dc0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Mar 2009 14:35:43 -0600 Subject: cpumask: tsk_cpumask for accessing the struct task_struct's cpus_allowed. This allows us to change the representation (to a dangling bitmap or cpumask_var_t) without breaking all the callers: they can use tsk_cpumask() now and won't see a difference as the changes roll into linux-next. Signed-off-by: Rusty Russell --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8c216e057c94..011db2f4c94c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1419,6 +1419,9 @@ struct task_struct { #endif }; +/* Future-safe accessor for struct task_struct's cpus_allowed. */ +#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed) + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH -- cgit v1.2.3 From 45e575ab9bfada5a5ef1b6174f8e749b1ecf0864 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 12 Mar 2009 14:35:44 -0600 Subject: cpumask: mm_cpumask for accessing the struct mm_struct's cpu_vm_mask. This allows us to change the representation (to a dangling bitmap or cpumask_var_t) without breaking all the callers: they can use mm_cpumask() now and won't see a difference as the changes roll into linux-next. Signed-off-by: Rusty Russell --- include/linux/mm_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 92915e81443f..d84feb7bdbf0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -276,4 +276,7 @@ struct mm_struct { #endif }; +/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ +#define mm_cpumask(mm) (&(mm)->cpu_vm_mask) + #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 446c92b2901bedb3725d29b4e73def8aba623ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 12 Mar 2009 18:03:16 +0100 Subject: [ARM] 5421/1: ftrace: fix crash due to tracing of __naked functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a fix for the following crash observed in 2.6.29-rc3: http://lkml.org/lkml/2009/1/29/150 On ARM it doesn't make sense to trace a naked function because then mcount is called without stack and frame pointer being set up and there is no chance to restore the lr register to the value before mcount was called. Reported-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Cc: Abhishek Sagar Cc: Steven Rostedt Cc: Ingo Molnar Signed-off-by: Uwe Kleine-König Signed-off-by: Russell King --- include/linux/compiler-gcc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1514d534deeb..a3ed7cb8ca34 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -52,7 +52,15 @@ #define __deprecated __attribute__((deprecated)) #define __packed __attribute__((packed)) #define __weak __attribute__((weak)) -#define __naked __attribute__((naked)) + +/* + * it doesn't make sense on ARM (currently the only user of __naked) to trace + * naked functions because then mcount is called without stack and frame pointer + * being set up and there is no chance to restore the lr register to the value + * before mcount was called. + */ +#define __naked __attribute__((naked)) notrace + #define __noreturn __attribute__((noreturn)) /* -- cgit v1.2.3 From 4bb9c5c02153dfc89a6c73a6f32091413805ad7d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 12 Mar 2009 17:45:27 -0700 Subject: VM, x86, PAT: Change is_linear_pfn_mapping to not use vm_pgoff Impact: fix false positive PAT warnings - also fix VirtalBox hang Use of vma->vm_pgoff to identify the pfnmaps that are fully mapped at mmap time is broken. vm_pgoff is set by generic mmap code even for cases where drivers are setting up the mappings at the fault time. The problem was originally reported here: http://marc.info/?l=linux-kernel&m=123383810628583&w=2 Change is_linear_pfn_mapping logic to overload VM_INSERTPAGE flag along with VM_PFNMAP to mean full PFNMAP setup at mmap time. Problem also tracked at: http://bugzilla.kernel.org/show_bug.cgi?id=12800 Reported-by: Thomas Hellstrom Tested-by: Frans Pop Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha @intel.com> Cc: Nick Piggin Cc: "ebiederm@xmission.com" Cc: # only for 2.6.29.1, not .28 LKML-Reference: <20090313004527.GA7176@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 065cdf8c09fb..3daa05feed9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,7 +98,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ @@ -126,6 +126,17 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +/* + * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). + * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. + * Note VM_INSERTPAGE flag is overloaded here. i.e, + * VM_INSERTPAGE && !VM_PFNMAP implies + * The vma has had "vm_insert_page()" done on it + * VM_INSERTPAGE && VM_PFNMAP implies + * The vma is PFNMAP with full mapping at mmap time + */ +#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -145,7 +156,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); + return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) -- cgit v1.2.3 From 5d82720a7f41f0c877e026c7d17e3bf20ccdbae0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 13 Mar 2009 21:16:13 +0100 Subject: ide: save the returned value of dma_map_sg dma_map_sg could return a value different to 'nents' argument of dma_map_sg so the ide stack needs to save it for the later usage (e.g. for_each_sg). The ide stack also needs to save the original sg_nents value for pci_unmap_sg. Signed-off-by: FUJITA Tomonori [bart: backport to Linus' tree] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e0cedfe9fad4..25087aead657 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -797,6 +797,7 @@ typedef struct hwif_s { struct scatterlist *sg_table; int sg_max_nents; /* Maximum number of entries in it */ int sg_nents; /* Current number of entries in it */ + int orig_sg_nents; int sg_dma_direction; /* dma transfer direction */ /* data phase of the active command (currently only valid for PIO/DMA) */ -- cgit v1.2.3 From 895791dac6946d535991edd11341046f8e85ea77 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 13 Mar 2009 16:35:44 -0700 Subject: VM, x86, PAT: add a new vm flag to track full pfnmap at mmap Impact: cleanup Add a new vm flag VM_PFN_AT_MMAP to identify a PFNMAP that is fully mapped with remap_pfn_range. Patch removes the overloading of VM_INSERTPAGE from the earlier patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Nick Piggin LKML-Reference: <20090313233543.GA19909@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3daa05feed9f..b1ea37fc7a24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,12 +98,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ +#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -126,17 +127,6 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) -/* - * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). - * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. - * Note VM_INSERTPAGE flag is overloaded here. i.e, - * VM_INSERTPAGE && !VM_PFNMAP implies - * The vma has had "vm_insert_page()" done on it - * VM_INSERTPAGE && VM_PFNMAP implies - * The vma is PFNMAP with full mapping at mmap time - */ -#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) - /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -156,7 +146,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); + return (vma->vm_flags & VM_PFN_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) -- cgit v1.2.3 From 87092698c665e0a358caf9825ae13114343027e8 Mon Sep 17 00:00:00 2001 From: un'ichi Nomura Date: Mon, 9 Mar 2009 10:40:52 +0100 Subject: block: Add gfp_mask parameter to bio_integrity_clone() Stricter gfp_mask might be required for clone allocation. For example, request-based dm may clone bio in interrupt context so it has to use GFP_ATOMIC. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Acked-by: Martin K. Petersen Cc: Alasdair G Kergon Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1b16108a5417..d8bd43bfdcf5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -531,7 +531,7 @@ extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int); -extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *); +extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); extern void bio_integrity_init_slab(void); @@ -542,7 +542,7 @@ extern void bio_integrity_init_slab(void); #define bioset_integrity_create(a, b) (0) #define bio_integrity_prep(a) (0) #define bio_integrity_enabled(a) (0) -#define bio_integrity_clone(a, b, c) (0) +#define bio_integrity_clone(a, b, c,d ) (0) #define bioset_integrity_free(a) do { } while (0) #define bio_integrity_free(a, b) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0) -- cgit v1.2.3 From 9d783ba042771284fb4ee5013c3d94220755ae7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:55 -0700 Subject: x86, x2apic: enable fault handling for intr-remapping Impact: interface augmentation (not yet used) Enable fault handling flow for intr-remapping aswell. Fault handling code now shared by both dma-remapping and intr-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/dmar.h | 5 ++--- include/linux/intel-iommu.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f28440784cf0..c7768330c11d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -49,6 +49,7 @@ extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ extern void detect_intel_iommu(void); +extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); @@ -116,9 +117,6 @@ extern struct intel_iommu *map_ioapic_to_ir(int apic); #define intr_remapping_enabled (0) #endif -#ifdef CONFIG_DMAR -extern const char *dmar_get_fault_reason(u8 fault_reason); - /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ @@ -129,6 +127,7 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); +#ifdef CONFIG_DMAR extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; struct dmar_rmrr_unit { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d2e3cbfba14f..a9563840644b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -292,6 +292,8 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + unsigned int irq; + unsigned char name[13]; /* Device Name */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -299,8 +301,6 @@ struct intel_iommu { spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ - unsigned int irq; - unsigned char name[7]; /* Device Name */ struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit v1.2.3 From eba67e5da6e971993b2899d2cdf459ce77d3dbc5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:56 -0700 Subject: x86, dmar: routines for disabling queued invalidation and intr remapping Impact: new interfaces (not yet used) Routines for disabling queued invalidation and interrupt remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a9563840644b..78c1262e8704 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -321,6 +321,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void dmar_disable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, -- cgit v1.2.3 From 1531a6a6b81a4e6f9eec9a5608758a6ea14b96e0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:57 -0700 Subject: x86, dmar: start with sane state while enabling dma and interrupt-remapping Impact: cleanup/sanitization Start from a sane state while enabling dma and interrupt-remapping, by clearing the previous recorded faults and disabling previously enabled queued invalidation and interrupt-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/dmar.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c7768330c11d..8a035aec14a9 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -24,6 +24,7 @@ #include #include #include +#include #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; @@ -125,6 +126,7 @@ extern void dmar_msi_mask(unsigned int irq); extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); +extern irqreturn_t dmar_fault(int irq, void *dev_id); extern int arch_setup_dmar_msi(unsigned int irq); #ifdef CONFIG_DMAR -- cgit v1.2.3 From 29b61be65a33c95564fa82e7e8d60d97adb68ea8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:02 -0700 Subject: x86, x2apic: cleanup ifdef CONFIG_INTR_REMAP in io_apic code Impact: cleanup Clean up #ifdefs and replace them with helper functions. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/dmar.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a035aec14a9..2f3427468956 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,9 +26,8 @@ #include #include -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; - +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -52,7 +51,6 @@ extern int dmar_dev_scope_init(void); extern void detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); - extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else @@ -65,12 +63,12 @@ static inline int dmar_table_init(void) { return -ENODEV; } +static inline int enable_drhd_fault_handling(void) +{ + return -1; +} #endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ -#ifdef CONFIG_INTR_REMAP -extern int intr_remapping_enabled; -extern int enable_intr_remapping(int); - struct irte { union { struct { @@ -99,6 +97,10 @@ struct irte { __u64 high; }; }; +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + extern int get_irte(int irq, struct irte *entry); extern int modify_irte(int irq, struct irte *irte_modified); extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); @@ -113,6 +115,35 @@ extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else +static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + return -1; +} +static inline int modify_irte(int irq, struct irte *irte_modified) +{ + return -1; +} +static inline int free_irte(int irq) +{ + return -1; +} +static inline int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + return -1; +} +static inline int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle) +{ + return -1; +} +static inline struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + return NULL; +} +static inline struct intel_iommu *map_ioapic_to_ir(int apic) +{ + return NULL; +} #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) -- cgit v1.2.3