From 1f5026a7e21e409c2b9dd54f6dfb9446511fb7c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:09 +0200 Subject: memblock: Kill MEMBLOCK_ERROR 25818f0f28 (memblock: Make MEMBLOCK_ERROR be 0) thankfully made MEMBLOCK_ERROR 0 and there already are codes which expect error return to be 0. There's no point in keeping MEMBLOCK_ERROR around. End its misery. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-6-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7525e38c434d..d235ec5fe678 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,8 +2,6 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ -#define MEMBLOCK_ERROR 0 - #ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. @@ -164,7 +162,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { - return MEMBLOCK_ERROR; + return 0; } #endif /* CONFIG_HAVE_MEMBLOCK */ -- cgit v1.2.3 From fc769a8e70a3348d5de49e5f69f6aff810157360 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:10 +0200 Subject: memblock: Replace memblock_find_base() with memblock_find_in_range() memblock_find_base() is a static function with two callers in memblock.c and memblock_find_in_range() is a wrapper around it which just changes the types and order of parameters. Make memblock_find_in_range() take phys_addr_t instead of u64 for consistency and replace memblock_find_base() with it. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-7-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d235ec5fe678..349688899cb0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -46,7 +46,8 @@ extern int memblock_can_resize; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); +phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align); int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); -- cgit v1.2.3 From 5dfe8660a3d7f1ee1265c3536433ee53da3f98a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 09:46:10 +0200 Subject: bootmem: Replace work_with_active_regions() with for_each_mem_pfn_range() Callback based iteration is cumbersome and much less useful than for_each_*() iterator. This patch implements for_each_mem_pfn_range() which replaces work_with_active_regions(). All the current users of work_with_active_regions() are converted. This simplifies walking over early_node_map and will allow converting internal logics in page_alloc to use iterator instead of walking early_node_map directly, which in turn will enable moving node information to memblock. powerpc change is only compile tested. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714074610.GD3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c70a326b8f26..57e4c9ffdff8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1327,9 +1327,27 @@ int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); u64 __init find_memory_core_early(int nid, u64 size, u64 align, u64 goal, u64 limit); -typedef int (*work_fn_t)(unsigned long, unsigned long, void *); -extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); + +extern void __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid); + +/** + * for_each_mem_pfn_range - early memory pfn range iterator + * @i: an integer used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to ulong for start pfn of the range, can be %NULL + * @p_end: ptr to ulong for end pfn of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over configured memory ranges. Available after early_node_map is + * populated. + */ +#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) + #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ -- cgit v1.2.3 From f9b18db3b1cedc75e5d002a4d7097891c3399736 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:32 +0200 Subject: memblock: Don't allow archs to override memblock_nid_range() memblock_nid_range() is used to implement memblock_[try_]alloc_nid(). The generic version determines the range by walking early_node_map with for_each_mem_pfn_range(). The generic version is defined __weak to allow arch override. Currently, only sparc overrides it; however, with the previous update to the generic implementation, there isn't much to be gained with arch override. Sparc would behave exactly the same with the generic implementation. This patch disallows arch override for memblock_nid_range() and make both generic and sparc versions static. sparc is only compile tested. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-6-git-send-email-tj@kernel.org Cc: "David S. Miller" Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 349688899cb0..329ffb26c1c9 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -89,7 +89,6 @@ extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); /* Provided by the architecture */ -extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, phys_addr_t addr2, phys_addr_t size2); -- cgit v1.2.3 From e64980405cc6aa74ef178d8d9aa4018c867ceed1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:34 +0200 Subject: memblock: Separate out memblock_find_in_range_node() Node affine memblock allocation logic is currently implemented across memblock_alloc_nid() and memblock_alloc_nid_region(). This reorganizes it such that it resembles that of non-NUMA allocation API. Area finding is collected and moved into new exported function memblock_find_in_range_node() which is symmetrical to non-NUMA counterpart - it handles @start/@end and understands ANYWHERE and ACCESSIBLE. memblock_alloc_nid() now simply calls memblock_find_in_range_node() and reserves the returned area. This makes memblock_alloc[_try]_nid() observe ACCESSIBLE limit on node affine allocations too (again, this doesn't make any difference for the current sole user - sparc64). Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-8-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 329ffb26c1c9..7400d029df48 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,10 @@ extern long memblock_reserve(phys_addr_t base, phys_addr_t size); /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ +extern phys_addr_t memblock_find_in_range_node(phys_addr_t start, + phys_addr_t end, + phys_addr_t size, + phys_addr_t align, int nid); extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, -- cgit v1.2.3 From eb40c4c27f1722f058e4713ccfedebac577d5190 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:35 +0200 Subject: memblock, x86: Replace memblock_x86_find_in_range_node() with generic memblock calls With the previous changes, generic NUMA aware memblock API has feature parity with memblock_x86_find_in_range_node(). There currently are two users - x86 setup_node_data() and __alloc_memory_core_early() in nobootmem.c. This patch converts the former to use memblock_alloc_nid() and the latter memblock_find_range_in_node(), and kills memblock_x86_find_in_range_node() and related functions including find_memory_early_core_early() in page_alloc.c. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-9-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 57e4c9ffdff8..9ebc65ae6863 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1325,8 +1325,6 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); -u64 __init find_memory_core_early(int nid, u64 size, u64 align, - u64 goal, u64 limit); extern void sparse_memory_present_with_active_regions(int nid); extern void __next_mem_pfn_range(int *idx, int nid, -- cgit v1.2.3 From ed7b56a799cade11f458cd83e1150af54a66b7e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:15:54 +0200 Subject: memblock: Remove memblock_memory_can_coalesce() Arch could implement memblock_memor_can_coalesce() to veto merging of adjacent or overlapping memblock regions; however, no arch did and any vetoing would trigger WARN_ON(). Memblock regions are supposed to deal with proper memory anyway. Remove the unused hook. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-2-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7400d029df48..aa5df9e11fff 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -92,10 +92,6 @@ extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); -/* Provided by the architecture */ -extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, - phys_addr_t addr2, phys_addr_t size2); - /** * memblock_set_current_limit - Set the current allocation limit to allow * limiting allocations to what is currently -- cgit v1.2.3 From 67e24bcb725cabd15ef577bf301275d03d6086d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:42:03 +0200 Subject: memblock: Use __meminit[data] instead of __init[data] From 19ab281ed67b87a6623d725237a7333ca79f1e75 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:16 +0200 memblock will be extended to include early_node_map[], which is also used during memory hotplug. Make memblock use __meminit[data] instead of __init[data] so that memory hotplug code can safely reference it. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094203.GE3455@htj.dyndns.org Reported-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index aa5df9e11fff..434b958a4f5f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -152,8 +152,8 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #ifdef ARCH_DISCARD_MEMBLOCK -#define __init_memblock __init -#define __initdata_memblock __initdata +#define __init_memblock __meminit +#define __initdata_memblock __meminitdata #else #define __init_memblock #define __initdata_memblock -- cgit v1.2.3 From 7c0caeb866b0f648d91bb75b8bc6f86af95bb033 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:43:42 +0200 Subject: memblock: Add optional region->nid From 83103b92f3234ec830852bbc5c45911bd6cbdb20 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:16 +0200 Add optional region->nid which can be enabled by arch using CONFIG_HAVE_MEMBLOCK_NODE_MAP. When enabled, memblock also carries NUMA node information and replaces early_node_map[]. Newly added memblocks have MAX_NUMNODES as nid. Arch can then call memblock_set_node() to set node information. memblock takes care of merging and node affine allocations w.r.t. node information. When MEMBLOCK_NODE_MAP is enabled, early_node_map[], related data structures and functions to manipulate and iterate it are disabled. memblock version of __next_mem_pfn_range() is provided such that for_each_mem_pfn_range() behaves the same and its users don't have to be updated. -v2: Yinghai spotted section mismatch caused by missing __init_memblock in memblock_set_node(). Fixed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094342.GF3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 26 ++++++++++++++++++++++++++ include/linux/mm.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 434b958a4f5f..c36a55d3c1c2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -24,6 +24,9 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + int nid; +#endif }; struct memblock_type { @@ -58,6 +61,29 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long memblock_free(phys_addr_t base, phys_addr_t size); extern long memblock_reserve(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); + +static inline void memblock_set_region_node(struct memblock_region *r, int nid) +{ + r->nid = nid; +} + +static inline int memblock_get_region_node(const struct memblock_region *r) +{ + return r->nid; +} +#else +static inline void memblock_set_region_node(struct memblock_region *r, int nid) +{ +} + +static inline int memblock_get_region_node(const struct memblock_region *r) +{ + return 0; +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 9ebc65ae6863..ceb1e4a1a736 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1307,12 +1307,14 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, * CONFIG_ARCH_POPULATES_NODE_MAP */ extern void free_area_init_nodes(unsigned long *max_zone_pfn); +#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); void sort_node_map(void); +#endif unsigned long node_map_pfn_alignment(void); unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, unsigned long end_pfn); -- cgit v1.2.3 From 35fd0808d7d8d001cd72f112e3bca84664b596a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:15:59 +0200 Subject: memblock: Implement for_each_free_mem_range() Implement for_each_free_mem_range() which iterates over free memory areas according to memblock (memory && !reserved). This will be used to simplify memblock users. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-7-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c36a55d3c1c2..31def584cceb 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,26 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long memblock_free(phys_addr_t base, phys_addr_t size); extern long memblock_reserve(phys_addr_t base, phys_addr_t size); +extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid); + +/** + * for_each_free_mem_range - iterate through free memblock areas + * @i: u64 used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over free (memory && !reserved) areas of memblock. Available as + * soon as memblock is initialized. + */ +#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \ + for (i = 0, \ + __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_free_mem_range(&i, nid, p_start, p_end, p_nid)) + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); -- cgit v1.2.3 From 64a02daacbc880bac1d6b3aeefbcd226a9341fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:16:01 +0200 Subject: memblock, x86: Make free_all_memory_core_early() explicitly free lowmem only nomemblock is currently used only by x86 and on x86_32 free_all_memory_core_early() silently freed only the low mem because get_free_all_memory_range() in arch/x86/mm/memblock.c implicitly limited range to max_low_pfn. Rename free_all_memory_core_early() to free_low_memory_core_early() and make it call __get_free_all_memory_range() and limit the range to max_low_pfn explicitly. This makes things clearer and also is consistent with the bootmem behavior. This leaves get_free_all_memory_range() without any user. Kill it. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-9-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- include/linux/bootmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index ab344a521105..66d3e954eb6c 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -44,7 +44,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, unsigned long endpfn); extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); -unsigned long free_all_memory_core_early(int nodeid); +extern unsigned long free_low_memory_core_early(int nodeid); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern unsigned long free_all_bootmem(void); -- cgit v1.2.3 From c378ddd53f9b8832a46fd4fec050a97fc2269858 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:46:03 +0200 Subject: memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option From 6839454ae63f1eb21e515c10229ca95c22955fec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:17 +0200 Make ARCH_DISCARD_MEMBLOCK a config option so that it can be handled together with other MEMBLOCK options. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094603.GH3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 31def584cceb..2491355bb6e4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -197,7 +197,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region++) -#ifdef ARCH_DISCARD_MEMBLOCK +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata #else -- cgit v1.2.3 From 24aa07882b672fff2da2f5c955759f0bd13d32d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:16:06 +0200 Subject: memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones Other than sanity check and debug message, the x86 specific version of memblock reserve/free functions are simple wrappers around the generic versions - memblock_reserve/free(). This patch adds debug messages with caller identification to the generic versions and replaces x86 specific ones and kills them. arch/x86/include/asm/memblock.h and arch/x86/mm/memblock.c are empty after this change and removed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-14-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2491355bb6e4..90746318cec4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -17,8 +17,6 @@ #include #include -#include - #define INIT_MEMBLOCK_REGIONS 128 struct memblock_region { -- cgit v1.2.3 From a390e85cfe91c346ff4745bcd45ad0a7e7101aa2 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 6 Oct 2011 10:07:44 +0300 Subject: wl12xx: move common init code from bus modules to main Move all common parts from sdio.c and spi.c to main.c, since they now can be handled as part of the platform driver. Signed-off-by: Felipe Balbi [forward-ported, cleaned-up and rephrased commit message] [added a bunch of fixes and a new pdata element] [moved some new code into main.c as well] Signed-off-by: Luciano Coelho --- include/linux/wl12xx.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 4b697395326e..0d6373195d32 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -54,6 +54,9 @@ struct wl12xx_platform_data { int board_ref_clock; int board_tcxo_clock; unsigned long platform_quirks; + bool pwr_in_suspend; + + struct wl1271_if_operations *ops; }; /* Platform does not support level trigger interrupts */ @@ -73,6 +76,6 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data) #endif -const struct wl12xx_platform_data *wl12xx_get_platform_data(void); +struct wl12xx_platform_data *wl12xx_get_platform_data(void); #endif -- cgit v1.2.3 From 0314322d1303065568f33869cbd01d7f7367efc4 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 29 Jul 2011 12:53:20 +0000 Subject: Docs: Pedantry: [Cc]ordic -> CORDIC According to: http://en.wikipedia.org/wiki/CORDIC it stands for: *CO*ordinate *R*otation *DI*gital *C*omputer Signed-off-by: Michael Witten Signed-off-by: Jiri Kosina --- include/linux/cordic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cordic.h b/include/linux/cordic.h index f932093e20c2..686f6bf8157d 100644 --- a/include/linux/cordic.h +++ b/include/linux/cordic.h @@ -36,7 +36,7 @@ struct cordic_iq { * @coord: function output parameter holding the i/q coordinate. * * The function calculates the i/q coordinate for a given angle using - * cordic algorithm. The coordinate consists of a real (i) and an + * CORDIC algorithm. The coordinate consists of a real (i) and an * imaginary (q) part. The real part is essentially the cosine of the * angle and the imaginary part is the sine of the angle. The returned * values are scaled by 2^16 for precision. The range for theta is -- cgit v1.2.3 From c620846967f335aafb874b9043a10a8fdda68a24 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 29 Jul 2011 13:00:47 +0000 Subject: Docs: wording: Insert `the' Signed-off-by: Michael Witten Signed-off-by: Jiri Kosina --- include/linux/cordic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cordic.h b/include/linux/cordic.h index 686f6bf8157d..cf68ca4a508c 100644 --- a/include/linux/cordic.h +++ b/include/linux/cordic.h @@ -35,7 +35,7 @@ struct cordic_iq { * @theta: angle in degrees for which i/q coordinate is to be calculated. * @coord: function output parameter holding the i/q coordinate. * - * The function calculates the i/q coordinate for a given angle using + * The function calculates the i/q coordinate for a given angle using the * CORDIC algorithm. The coordinate consists of a real (i) and an * imaginary (q) part. The real part is essentially the cosine of the * angle and the imaginary part is the sine of the angle. The returned -- cgit v1.2.3 From 466b3ddfbcf4f5ce402a77397630a0fa9ea9ce6b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sun, 30 Oct 2011 16:35:08 +0100 Subject: PCI: Fix compile errors with PCI_ATS and !PCI_IOV The ats and sroiv members of 'struct pci_dev' are required for the ATS code already, even without IOV support compiled in. So depend on ATS here. This is fine with PCI_IOV too because it selects PCI_ATS. Also the prototypes for ATS need to be available for PCI_ATS. Reported-by: Randy Dunlap Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- include/linux/pci-ats.h | 6 +++--- include/linux/pci.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index e3d0b3890249..7ef68724f0f0 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -12,7 +12,7 @@ struct pci_ats { unsigned int is_enabled:1; /* Enable bit is set */ }; -#ifdef CONFIG_PCI_IOV +#ifdef CONFIG_PCI_ATS extern int pci_enable_ats(struct pci_dev *dev, int ps); extern void pci_disable_ats(struct pci_dev *dev); @@ -29,7 +29,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev) return dev->ats && dev->ats->is_enabled; } -#else /* CONFIG_PCI_IOV */ +#else /* CONFIG_PCI_ATS */ static inline int pci_enable_ats(struct pci_dev *dev, int ps) { @@ -50,7 +50,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev) return 0; } -#endif /* CONFIG_PCI_IOV */ +#endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI diff --git a/include/linux/pci.h b/include/linux/pci.h index 337df0d5d5f7..7cda65b5f798 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -338,7 +338,7 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; -#ifdef CONFIG_PCI_IOV +#ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* SR-IOV capability related */ struct pci_dev *physfn; /* the PF this VF is associated with */ -- cgit v1.2.3 From 1788ea6e3b2a58cf4fb00206e362d9caff8d86a7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Nov 2011 13:31:21 -0400 Subject: nfs: when attempting to open a directory, fall back on normal lookup (try #5) commit d953126 changed how nfs_atomic_lookup handles an -EISDIR return from an OPEN call. Prior to that patch, that caused the client to fall back to doing a normal lookup. When that patch went in, the code began returning that error to userspace. The d_revalidate codepath however never had the corresponding change, so it was still possible to end up with a NULL ctx->state pointer after that. That patch caused a regression. When we attempt to open a directory that does not have a cached dentry, that open now errors out with EISDIR. If you attempt the same open with a cached dentry, it will succeed. Fix this by reverting the change in nfs_atomic_lookup and allowing attempts to open directories to fall back to a normal lookup Also, add a NFSv4-specific f_ops->open routine that just returns -ENOTDIR. This should never be called if things are working properly, but if it ever is, then the dprintk may help in debugging. To facilitate this, a new file_operations field is also added to the nfs_rpc_ops struct. Cc: stable@kernel.org Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 3 +++ include/linux/nfs_xdr.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ab2c6343361a..92ecf5585fac 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -410,6 +410,9 @@ extern const struct inode_operations nfs_file_inode_operations; extern const struct inode_operations nfs3_file_inode_operations; #endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_file_operations; +#ifdef CONFIG_NFS_V4 +extern const struct file_operations nfs4_file_operations; +#endif /* CONFIG_NFS_V4 */ extern const struct address_space_operations nfs_file_aops; extern const struct address_space_operations nfs_dir_aops; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c74595ba7094..2a7c533be5dd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1192,6 +1192,7 @@ struct nfs_rpc_ops { const struct dentry_operations *dentry_ops; const struct inode_operations *dir_inode_ops; const struct inode_operations *file_inode_ops; + const struct file_operations *file_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -- cgit v1.2.3 From 11be0b3c18d654a8d5ed441fa9e988193a57c1d2 Mon Sep 17 00:00:00 2001 From: Vsevolod Alekseev Date: Sat, 5 Nov 2011 02:35:28 -0700 Subject: security.h: fix misc typos/grammar errors in comments Fix various typos/grammar errors in include/linux/security.h comments (no code changes). Signed-off-by: Vsevolod Alekseev Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- include/linux/security.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 19d8e04e1688..94c35336b86b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -186,7 +186,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Security module identifier. * * @name: - * A string that acts as a unique identifeir for the LSM with max number + * A string that acts as a unique identifier for the LSM with max number * of characters = SECURITY_NAME_MAX. * * Security hooks for program execution operations. @@ -275,7 +275,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @copy copied data which will be passed to the security module. * Returns 0 if the copy was successful. * @sb_remount: - * Extracts security system specifc mount options and verifys no changes + * Extracts security system specific mount options and verifies no changes * are being made to those options. * @sb superblock being remounted * @data contains the filesystem-specific data. @@ -380,15 +380,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if permission is granted. * @inode_mkdir: * Check permissions to create a new directory in the existing directory - * associated with inode strcture @dir. - * @dir containst the inode structure of parent of the directory to be created. + * associated with inode structure @dir. + * @dir contains the inode structure of parent of the directory to be created. * @dentry contains the dentry structure of new directory. * @mode contains the mode of new directory. * Return 0 if permission is granted. * @path_mkdir: * Check permissions to create a new directory in the existing directory - * associated with path strcture @path. - * @dir containst the path structure of parent of the directory + * associated with path structure @path. + * @dir contains the path structure of parent of the directory * to be created. * @dentry contains the dentry structure of new directory. * @mode contains the mode of new directory. @@ -578,7 +578,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @file contains the file structure. * @cmd contains the operation to perform. * @arg contains the operational arguments. - * Check permission for an ioctl operation on @file. Note that @arg can + * Check permission for an ioctl operation on @file. Note that @arg * sometimes represents a user space pointer; in other cases, it may be a * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. @@ -606,7 +606,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if permission is granted. * @file_fcntl: * Check permission before allowing the file operation specified by @cmd - * from being performed on the file @file. Note that @arg can sometimes + * from being performed on the file @file. Note that @arg sometimes * represents a user space pointer; in other cases, it may be a simple * integer value. When @arg represents a user space pointer, it should * never be used by the security module. @@ -793,7 +793,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * information can be saved using the eff_cap field of the * netlink_skb_parms structure. Also may be used to provide fine * grained control over message transmission. - * @sk associated sock of task sending the message., + * @sk associated sock of task sending the message. * @skb contains the sk_buff structure for the netlink message. * Return 0 if the information was successfully saved and message * is allowed to be transmitted. @@ -1080,9 +1080,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * should free it. * @key points to the key to be queried. * @_buffer points to a pointer that should be set to point to the - * resulting string (if no label or an error occurs). + * resulting string (if no label or an error occurs). * Return the length of the string (including terminating NUL) or -ve if - * an error. + * an error. * May also return 0 (and a NULL buffer pointer) if there is no label. * * Security hooks affecting all System V IPC operations. @@ -1268,7 +1268,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * credentials. * @tsk contains the task_struct for the process. * @cred contains the credentials to use. - * @ns contains the user namespace we want the capability in + * @ns contains the user namespace we want the capability in * @cap contains the capability . * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. @@ -1370,7 +1370,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @ctxlen contains the length of @ctx. * * @inode_getsecctx: - * Returns a string containing all relavent security context information + * Returns a string containing all relevant security context information * * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. -- cgit v1.2.3 From f8beab2bb611d735767871e0e1a12dc6a0def7b1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Oct 2011 23:50:49 +0200 Subject: regmap: Add a reusable irq_chip for regmap based interrupt controllers There seem to be lots of regmap-using devices with very similar interrupt controllers with a small bank of interrupt registers and mask registers with an interrupt per bit. This won't cover everything but it's a good start. Each chip supplies a base for the status registers, a base for the mask registers, an optional base for writing acknowledgements (which may be the same as the status registers) and an array of bits within each of these register banks which indicate the interrupt. There is an assumption that the bit for each interrupt will be the same in each of the register bank. Signed-off-by: Mark Brown --- include/linux/regmap.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 690276a642cf..bd54cecdfdf8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -144,4 +144,51 @@ int regcache_sync(struct regmap *map); void regcache_cache_only(struct regmap *map, bool enable); void regcache_cache_bypass(struct regmap *map, bool enable); +/** + * Description of an IRQ for the generic regmap irq_chip. + * + * @reg_offset: Offset of the status/mask register within the bank + * @mask: Mask used to flag/control the register. + */ +struct regmap_irq { + unsigned int reg_offset; + unsigned int mask; +}; + +/** + * Description of a generic regmap irq_chip. This is not intended to + * handle every possible interrupt controller, but it should handle a + * substantial proportion of those that are found in the wild. + * + * @name: Descriptive name for IRQ controller. + * + * @status_base: Base status register address. + * @mask_base: Base mask register address. + * @ack_base: Base ack address. If zero then the chip is clear on read. + * + * @num_regs: Number of registers in each control bank. + * @irqs: Descriptors for individual IRQs. Interrupt numbers are + * assigned based on the index in the array of the interrupt. + * @num_irqs: Number of descriptors. + */ +struct regmap_irq_chip { + const char *name; + + unsigned int status_base; + unsigned int mask_base; + unsigned int ack_base; + + int num_regs; + + const struct regmap_irq *irqs; + int num_irqs; +}; + +struct regmap_irq_chip_data; + +int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, + int irq_base, struct regmap_irq_chip *chip, + struct regmap_irq_chip_data **data); +void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data); + #endif -- cgit v1.2.3 From 8ae0d7e8a918e9603748abe9b31984fc5d96abb3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 26 Oct 2011 10:34:22 +0200 Subject: regmap: Track if the register cache is dirty and suppress unneeded syncs Allow drivers to optimise out the register cache sync if they didn't need to do one. If the hardware is desynced from the register cache (by power loss for example) then the driver should call regcache_mark_dirty() to let the core know about this. Signed-off-by: Mark Brown --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 690276a642cf..32043a9749e6 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -143,5 +143,6 @@ int regmap_update_bits(struct regmap *map, unsigned int reg, int regcache_sync(struct regmap *map); void regcache_cache_only(struct regmap *map, bool enable); void regcache_cache_bypass(struct regmap *map, bool enable); +void regcache_mark_dirty(struct regmap *map); #endif -- cgit v1.2.3 From 50b776fc71c13663eb7434f634f2b796de5c9885 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 Nov 2011 15:00:03 +0000 Subject: regmap: Rename LZO cache type to compressed Users probably don't care about the specific compression algorithm and we might want to use a different algorithm (snappy being the one I'm thinking of right now) so update the public interface to have a more generic name. Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 32043a9749e6..bebda1481f23 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -25,7 +25,7 @@ enum regcache_type { REGCACHE_NONE, REGCACHE_INDEXED, REGCACHE_RBTREE, - REGCACHE_LZO + REGCACHE_COMPRESSED }; /** -- cgit v1.2.3 From c74d084f914e16e42730bcf625ab3f37a4cae8d4 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 15 Oct 2011 00:14:49 +0200 Subject: mac80211: handle HT PHY BSS membership selector value correctly 802.11n-2009 extends the supported rates element with a magic value which can be used to prevent legacy stations from joining the BSS. However, this magic value is not a rate like the others and the magic can simply be ignored/skipped at this late stage. Signed-off-by: Christian Lamparter --- Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 48363c3c40f8..9789aedb2453 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -770,6 +770,9 @@ struct ieee80211_mgmt { } u; } __attribute__ ((packed)); +/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ +#define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 + /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) -- cgit v1.2.3 From c2e889a7f7947bc346e0a341e793fd5cb471d884 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 2 Nov 2011 23:34:56 +0200 Subject: ieee80211: Define cipher suite selector for WPI-SMS4 This value is used for WPI-SMS4 in ISO/IEC JTC 1 N 9880. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9789aedb2453..ffc073ab3ff8 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1555,6 +1555,8 @@ enum ieee80211_sa_query_action { #define WLAN_CIPHER_SUITE_WEP104 0x000FAC05 #define WLAN_CIPHER_SUITE_AES_CMAC 0x000FAC06 +#define WLAN_CIPHER_SUITE_SMS4 0x00147201 + /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X 0x000FAC01 #define WLAN_AKM_SUITE_PSK 0x000FAC02 -- cgit v1.2.3 From 6e3e939f3b1bf8534b32ad09ff199d88800835a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 9 Nov 2011 10:15:42 +0100 Subject: net: add wireless TX status socket option The 802.1X EAPOL handshake hostapd does requires knowing whether the frame was ack'ed by the peer. Currently, we fudge this pretty badly by not even transmitting the frame as a normal data frame but injecting it with radiotap and getting the status out of radiotap monitor as well. This is rather complex, confuses users (mon.wlan0 presence) and doesn't work with all hardware. To get rid of that hack, introduce a real wifi TX status option for data frame transmissions. This works similar to the existing TX timestamping in that it reflects the SKB back to the socket's error queue with a SCM_WIFI_STATUS cmsg that has an int indicating ACK status (0/1). Since it is possible that at some point we will want to have TX timestamping and wifi status in a single errqueue SKB (there's little point in not doing that), redefine SO_EE_ORIGIN_TIMESTAMPING to SO_EE_ORIGIN_TXSTATUS which can collect more than just the timestamp; keep the old constant as an alias of course. Currently the internal APIs don't make that possible, but it wouldn't be hard to split them up in a way that makes it possible. Thanks to Neil Horman for helping me figure out the functions that add the control messages. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/errqueue.h | 3 ++- include/linux/skbuff.h | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 034072cea853..c9f522bd17e4 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -17,7 +17,8 @@ struct sock_extended_err { #define SO_EE_ORIGIN_LOCAL 1 #define SO_EE_ORIGIN_ICMP 2 #define SO_EE_ORIGIN_ICMP6 3 -#define SO_EE_ORIGIN_TIMESTAMPING 4 +#define SO_EE_ORIGIN_TXSTATUS 4 +#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6a6b352326d7..ff7e1306a2d2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -218,6 +218,9 @@ enum { /* device driver supports TX zero-copy buffers */ SKBTX_DEV_ZEROCOPY = 1 << 4, + + /* generate wifi status information (where possible) */ + SKBTX_WIFI_STATUS = 1 << 5, }; /* @@ -352,6 +355,8 @@ typedef unsigned char *sk_buff_data_t; * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport * ports. + * @wifi_acked_valid: wifi_acked was set + * @wifi_acked: whether frame was acked on wifi or not * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -445,10 +450,11 @@ struct sk_buff { #endif __u8 ooo_okay:1; __u8 l4_rxhash:1; + __u8 wifi_acked_valid:1; + __u8 wifi_acked:1; + /* 10/12 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); - /* 0/13 bit hole */ - #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif @@ -2263,6 +2269,15 @@ static inline void skb_tx_timestamp(struct sk_buff *skb) sw_tx_timestamp(skb); } +/** + * skb_complete_wifi_ack - deliver skb with wifi status + * + * @skb: the original outgoing packet + * @acked: ack status + * + */ +void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); -- cgit v1.2.3 From 6cc00d545a21ed26696f3bda865ebf11eccbf2b5 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 3 Nov 2011 21:11:11 -0700 Subject: mac80211: QoS multicast frames have No Ack policy Previously QoS multicast frames had the Normal Acknowledgment QoS control bits set. This would cause broadcast frames to be discarded by peers with which we have a BA session, since their sequence number would fall outside the allowed range. Set No Ack QoS control bits on multicast QoS frames and filter these in de-aggregation code. Signed-off-by: Thomas Pedersen v2: Use proper QoS Ack Policy ctl field mask (Christian) v3: Clean up conditional (Johannes) Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ffc073ab3ff8..66cedf6eb5c2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -128,6 +128,7 @@ #define IEEE80211_QOS_CTL_ACK_POLICY_NOACK 0x0020 #define IEEE80211_QOS_CTL_ACK_POLICY_NO_EXPL 0x0040 #define IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK 0x0060 +#define IEEE80211_QOS_CTL_ACK_POLICY_MASK 0x0060 /* A-MSDU 802.11n */ #define IEEE80211_QOS_CTL_A_MSDU_PRESENT 0x0080 /* Mesh Control 802.11s */ -- cgit v1.2.3 From 28946da763e8b8d8ffd01ab861b684a4afb4bc3b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Nov 2011 11:18:12 +0100 Subject: nl80211: allow subscribing to unexpected class3 frames To implement AP mode without monitor interfaces we need to be able to send a deauth to stations that send frames without being associated. Enable this by adding a new nl80211 event for such frames that an application can subscribe to. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8049bf77d799..9107adc73e0b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -509,6 +509,16 @@ * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. * + * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP + * (or GO) interface (i.e. hostapd) to ask for unexpected frames to + * implement sending deauth to stations that send unexpected class 3 + * frames. Also used as the event sent by the kernel when such a frame + * is received. + * For the event, the %NL80211_ATTR_MAC attribute carries the TA and + * other attributes like the interface index are present. + * If used as the command it must have an interface index and you can + * only unsubscribe from the event by closing the socket. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -638,6 +648,8 @@ enum nl80211_commands { NL80211_CMD_TDLS_OPER, NL80211_CMD_TDLS_MGMT, + NL80211_CMD_UNEXPECTED_FRAME, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 562a74803f4881772ba2375ec4e5aa0ad90f4caa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Nov 2011 12:39:33 +0100 Subject: nl80211: advertise device AP SME Add the ability to advertise that the device contains the AP SME and what features it can support. There are currently no features in the bitmap -- probe response offload will be advertised by a few patches Arik is working on now (who took over from Guy Eilam) and a device with AP SME will typically implement and require response offload. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9107adc73e0b..ff39e4b234d4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1121,6 +1121,11 @@ enum nl80211_commands { * %NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be * used for asking the driver to perform a TDLS operation. * + * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices + * that have AP support to indicate that they have the AP SME integrated + * with support for the features listed in this attribute, see + * &enum nl80211_ap_sme_features. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1349,6 +1354,8 @@ enum nl80211_attrs { NL80211_ATTR_TDLS_SUPPORT, NL80211_ATTR_TDLS_EXTERNAL_SETUP, + NL80211_ATTR_DEVICE_AP_SME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2662,4 +2669,12 @@ enum nl80211_tdls_operation { NL80211_TDLS_DISABLE_LINK, }; +/* + * enum nl80211_ap_sme_features - device-integrated AP features + * Reserved for future use, no bits are defined in + * NL80211_ATTR_DEVICE_AP_SME yet. +enum nl80211_ap_sme_features { +}; + */ + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 7f6cf311a594c1e7ca8120367dd1d4c685aabff1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Nov 2011 11:18:15 +0100 Subject: nl80211: add API to probe a client When the AP SME in hostapd is used it wants to probe the clients when they have been idle for some time. Add explicit API to support this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index ff39e4b234d4..901a70d327d1 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -519,6 +519,14 @@ * If used as the command it must have an interface index and you can * only unsubscribe from the event by closing the socket. * + * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface + * by sending a null data frame to it and reporting when the frame is + * acknowleged. This is used to allow timing out inactive clients. Uses + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a + * direct reply with an %NL80211_ATTR_COOKIE that is later used to match + * up the event with the request. The event includes the same data and + * has %NL80211_ATTR_ACK set if the frame was ACKed. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -650,6 +658,8 @@ enum nl80211_commands { NL80211_CMD_UNEXPECTED_FRAME, + NL80211_CMD_PROBE_CLIENT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 5e760230e42cf759bd923457ca2753aacf2e656e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Nov 2011 11:18:17 +0100 Subject: cfg80211: allow registering to beacons Add the ability to register to received beacon frames to allow implementing OLBC logic in userspace. The registration is per wiphy since there's no point in receiving the same frame multiple times. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 901a70d327d1..c29a284c27e6 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -527,6 +527,11 @@ * up the event with the request. The event includes the same data and * has %NL80211_ATTR_ACK set if the frame was ACKed. * + * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from + * other BSSes when any interfaces are in AP mode. This helps implement + * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME + * messages. Note that per PHY only one application may register. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -660,6 +665,8 @@ enum nl80211_commands { NL80211_CMD_PROBE_CLIENT, + NL80211_CMD_REGISTER_BEACONS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From b92ab5d86dafc2b3733c5fdd5def40c8fe7ea7c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Nov 2011 11:18:19 +0100 Subject: cfg80211: add event for unexpected 4addr frames The frames are used by AP/STA WDS mode, and hostapd needs to know when such a frame was received to set up the VLAN appropriately to allow using it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c29a284c27e6..09474ab7de8c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -517,7 +517,13 @@ * For the event, the %NL80211_ATTR_MAC attribute carries the TA and * other attributes like the interface index are present. * If used as the command it must have an interface index and you can - * only unsubscribe from the event by closing the socket. + * only unsubscribe from the event by closing the socket. Subscription + * is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events. + * + * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the + * associated station identified by %NL80211_ATTR_MAC sent a 4addr frame + * and wasn't already in a 4-addr VLAN. The event will be sent similarly + * to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener. * * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface * by sending a null data frame to it and reporting when the frame is @@ -667,6 +673,8 @@ enum nl80211_commands { NL80211_CMD_REGISTER_BEACONS, + NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From e247bd9068e3e86c3571147c128883596ace9d05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Nov 2011 11:18:21 +0100 Subject: cfg80211/mac80211: allow management TX to not wait for ACK For probe responses it can be useful to not wait for ACK to avoid retransmissions if the station that sent the probe is already on the next channel, so allow userspace to request not caring about the ACK with a new nl80211 flag. Since mac80211 needs to be updated for the new function prototype anyway implement it right away -- it's just a few lines of code. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 09474ab7de8c..165e16fc7af1 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1151,6 +1151,11 @@ enum nl80211_commands { * with support for the features listed in this attribute, see * &enum nl80211_ap_sme_features. * + * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells + * the driver to not wait for an acknowledgement. Note that due to this, + * it will also not give a status callback nor return a cookie. This is + * mostly useful for probe responses to save airtime. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1381,6 +1386,8 @@ enum nl80211_attrs { NL80211_ATTR_DEVICE_AP_SME, + NL80211_ATTR_DONT_WAIT_FOR_ACK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 1f074bd8eb7a4a210a5119cd7220f89da6c7a2c3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 Nov 2011 14:13:33 +0100 Subject: nl80211: advertise socket TX status capability The new wifi socket TX capability should be supported by wifi drivers, let them advertise whether they do or not. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 165e16fc7af1..3152ddfb4294 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -695,6 +695,8 @@ enum nl80211_commands { #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS + /* source-level API compatibility */ #define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG #define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG @@ -1156,6 +1158,9 @@ enum nl80211_commands { * it will also not give a status callback nor return a cookie. This is * mostly useful for probe responses to save airtime. * + * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from + * &enum nl80211_feature_flags and is advertised in wiphy information. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1388,6 +1393,8 @@ enum nl80211_attrs { NL80211_ATTR_DONT_WAIT_FOR_ACK, + NL80211_ATTR_FEATURE_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1422,6 +1429,7 @@ enum nl80211_attrs { #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES #define NL80211_ATTR_KEY NL80211_ATTR_KEY #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -2709,4 +2717,14 @@ enum nl80211_ap_sme_features { }; */ +/** + * enum nl80211_feature_flags - device/driver features + * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back + * TX status to the socket error queue when requested with the + * socket option. + */ +enum nl80211_feature_flags { + NL80211_FEATURE_SK_TX_STATUS = 1 << 0, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From d0985394e7fee6b25a7cc8335d45bc1c1a8ab2d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 10 Nov 2011 09:03:55 +0100 Subject: block: Revert "[SCSI] genhd: add a new attribute "alias" in gendisk" This reverts commit a72c5e5eb738033938ab30d6a634b74d1d060f10. The commit introduced alias for block devices which is intended to be used during logging although actual usage hasn't been committed yet. This approach adds very limited benefit (raw log might be easier to follow) which can be trivially implemented in userland but has a lot of problems. It is much worse than netif renames because it doesn't rename the actual device but just adds conveninence name which isn't used universally or enforced. Everything internal including device lookup and sysfs still uses the internal name and nothing prevents two devices from using conflicting alias - ie. sda can have sdb as its alias. This has been nacked by people working on device driver core, block layer and kernel-userland interface and shouldn't have been upstreamed. Revert it. http://thread.gmane.org/gmane.linux.kernel/1155104 http://thread.gmane.org/gmane.linux.scsi/68632 http://thread.gmane.org/gmane.linux.scsi/69776 Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Kay Sievers Cc: "James E.J. Bottomley" Cc: Nao Nishijima Cc: Alan Cox Cc: Al Viro Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9de31bc98c88..6d18f3531f18 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -21,8 +21,6 @@ #define dev_to_part(device) container_of((device), struct hd_struct, __dev) #define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) -#define alias_name(disk) ((disk)->alias ? (disk)->alias : \ - (disk)->disk_name) extern struct device_type part_type; extern struct kobject *block_depr; @@ -60,7 +58,6 @@ enum { #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 -#define ALIAS_LEN 256 #include #include @@ -166,7 +163,6 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *alias; /* alias name of disk */ char *(*devnode)(struct gendisk *gd, mode_t *mode); unsigned int events; /* supported events */ -- cgit v1.2.3 From d65670a78cdbfae94f20a9e05ec705871d7cdf2b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 31 Oct 2011 17:06:35 -0400 Subject: clocksource: Avoid selecting mult values that might overflow when adjusted For some frequencies, the clocks_calc_mult_shift() function will unfortunately select mult values very close to 0xffffffff. This has the potential to overflow when NTP adjusts the clock, adding to the mult value. This patch adds a clocksource.maxadj value, which provides an approximation of an 11% adjustment(NTP limits adjustments to 500ppm and the tick adjustment is limited to 10%), which could be made to the clocksource.mult value. This is then used to both check that the current mult value won't overflow/underflow, as well as warning us if the timekeeping_adjust() code pushes over that 11% boundary. v2: Fix max_adjustment calculation, and improve WARN_ONCE messages. v3: Don't warn before maxadj has actually been set CC: Yong Zhang CC: David Daney CC: Thomas Gleixner CC: Chen Jie CC: zhangfx CC: stable@kernel.org Reported-by: Chen Jie Reported-by: zhangfx Tested-by: Yong Zhang Signed-off-by: John Stultz --- include/linux/clocksource.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 139c4db55f17..c86c940d1de3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) + * @maxadj maximum adjustment value to mult (~11%) * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary @@ -172,7 +173,7 @@ struct clocksource { u32 mult; u32 shift; u64 max_idle_ns; - + u32 maxadj; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif -- cgit v1.2.3 From 87bbbe22f84b91d0bcd3a7fc638e4f5e8224cc4e Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 10 Nov 2011 11:28:55 +0200 Subject: nl80211: Add probe response offload attribute Notify user-space about probe-response offloading support in the driver. A wiphy flag is used to indicate support and a bitmap of protocols determines which protocols are supported. Signed-off-by: Guy Eilam Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- include/linux/nl80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 3152ddfb4294..be92333cf8fe 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1160,6 +1160,11 @@ enum nl80211_commands { * * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from * &enum nl80211_feature_flags and is advertised in wiphy information. + * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe + * + * requests while operating in AP-mode. + * This attribute holds a bitmap of the supported protocols for + * offloading (see &enum nl80211_probe_resp_offload_support_attr). * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -1395,6 +1400,8 @@ enum nl80211_attrs { NL80211_ATTR_FEATURE_FLAGS, + NL80211_ATTR_PROBE_RESP_OFFLOAD, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2727,4 +2734,25 @@ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, }; +/** + * enum nl80211_probe_resp_offload_support_attr - optional supported + * protocols for probe-response offloading by the driver/FW. + * To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute. + * Each enum value represents a bit in the bitmap of supported + * protocols. Typically a subset of probe-requests belonging to a + * supported protocol will be excluded from offload and uploaded + * to the host. + * + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u + */ +enum nl80211_probe_resp_offload_support_attr { + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS = 1<<0, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 = 1<<1, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P = 1<<2, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U = 1<<3, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 00f740e1a3b7abb51980371ee8fa113df22ae0b8 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 10 Nov 2011 11:28:56 +0200 Subject: nl80211: Pass probe response data to drivers Pass probe-response data from usermode via beacon parameters. Signed-off-by: Guy Eilam Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index be92333cf8fe..f9261c253735 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1166,6 +1166,10 @@ enum nl80211_commands { * This attribute holds a bitmap of the supported protocols for * offloading (see &enum nl80211_probe_resp_offload_support_attr). * + * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire + * probe-response frame. The DA field in the 802.11 header is zero-ed out, + * to be filled by the FW. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1402,6 +1406,8 @@ enum nl80211_attrs { NL80211_ATTR_PROBE_RESP_OFFLOAD, + NL80211_ATTR_PROBE_RESP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 224736d9113ab4a7cf3f05c05377492bd99b4b02 Mon Sep 17 00:00:00 2001 From: Stratos Psomadakis Date: Thu, 10 Nov 2011 15:45:37 +0200 Subject: libceph: Allocate larger oid buffer in request msgs ceph_osd_request struct allocates a 40-byte buffer for object names. RBD image names can be up to 96 chars long (100 with the .rbd suffix), which results in the object name for the image being truncated, and a subsequent map failure. Increase the oid buffer in request messages, in order to avoid the truncation. Signed-off-by: Stratos Psomadakis Signed-off-by: Sage Weil --- include/linux/ceph/osd_client.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f88eacb111d4..7c05ac202d90 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,12 @@ #include "osdmap.h" #include "messenger.h" +/* + * Maximum object name size + * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100) + */ +#define MAX_OBJ_NAME_SIZE 100 + struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; @@ -75,7 +81,7 @@ struct ceph_osd_request { struct inode *r_inode; /* for use by callbacks */ void *r_priv; /* ditto */ - char r_oid[40]; /* object name */ + char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; unsigned long r_stamp; /* send OR check time */ -- cgit v1.2.3 From 3d249d4ca7d0ed6629a135ea1ea21c72286c0d80 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 11 Nov 2011 22:16:48 +0000 Subject: net: introduce ethernet teaming device This patch introduces new network device called team. It supposes to be very fast, simple, userspace-driven alternative to existing bonding driver. Userspace library called libteam with couple of demo apps is available here: https://github.com/jpirko/libteam Note it's still in its dipers atm. team<->libteam use generic netlink for communication. That and rtnl suppose to be the only way to configure team device, no sysfs etc. Python binding of libteam was recently introduced. Daemon providing arpmon/miimon active-backup functionality will be introduced shortly. All what's necessary is already implemented in kernel team driver. v7->v8: - check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling them. - use dev_kfree_skb_any() instead of dev_kfree_skb() v6->v7: - transmit and receive functions are not checked in hot paths. That also resolves memory leak on transmit when no port is present v5->v6: - changed couple of _rcu calls to non _rcu ones in non-readers v4->v5: - team_change_mtu() uses team->lock while travesing though port list - mac address changes are moved completely to jurisdiction of userspace daemon. This way the daemon can do FOM1, FOM2 and possibly other weird things with mac addresses. Only round-robin mode sets up all ports to bond's address then enslaved. - Extended Kconfig text v3->v4: - remove redundant synchronize_rcu from __team_change_mode() - revert "set and clear of mode_ops happens per pointer, not per byte" - extend comment of function __team_change_mode() v2->v3: - team_change_mtu() uses rcu version of list traversal to unwind - set and clear of mode_ops happens per pointer, not per byte - port hashlist changed to be embedded into team structure - error branch in team_port_enter() does cleanup now - fixed rtln->rtnl v1->v2: - modes are made as modules. Makes team more modular and extendable. - several commenters' nitpicks found on v1 were fixed - several other bugs were fixed. - note I ignored Eric's comment about roundrobin port selector as Eric's way may be easily implemented as another mode (mode "random") in future. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/if.h | 1 + include/linux/if_team.h | 242 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 include/linux/if_team.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 619b5657af77..0b091b32267d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -185,6 +185,7 @@ header-y += if_pppol2tp.h header-y += if_pppox.h header-y += if_slip.h header-y += if_strip.h +header-y += if_team.h header-y += if_tr.h header-y += if_tun.h header-y += if_tunnel.h diff --git a/include/linux/if.h b/include/linux/if.h index db20bd4fd16b..06b6ef60c821 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -79,6 +79,7 @@ #define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ +#define IFF_TEAM_PORT 0x40000 /* device used as team port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_team.h b/include/linux/if_team.h new file mode 100644 index 000000000000..14f6388f5460 --- /dev/null +++ b/include/linux/if_team.h @@ -0,0 +1,242 @@ +/* + * include/linux/if_team.h - Network team device driver header + * Copyright (c) 2011 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_IF_TEAM_H_ +#define _LINUX_IF_TEAM_H_ + +#ifdef __KERNEL__ + +struct team_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_dropped; + u32 tx_dropped; +}; + +struct team; + +struct team_port { + struct net_device *dev; + struct hlist_node hlist; /* node in hash list */ + struct list_head list; /* node in ordinary list */ + struct team *team; + int index; + + /* + * A place for storing original values of the device before it + * become a port. + */ + struct { + unsigned char dev_addr[MAX_ADDR_LEN]; + unsigned int mtu; + } orig; + + bool linkup; + u32 speed; + u8 duplex; + + struct rcu_head rcu; +}; + +struct team_mode_ops { + int (*init)(struct team *team); + void (*exit)(struct team *team); + rx_handler_result_t (*receive)(struct team *team, + struct team_port *port, + struct sk_buff *skb); + bool (*transmit)(struct team *team, struct sk_buff *skb); + int (*port_enter)(struct team *team, struct team_port *port); + void (*port_leave)(struct team *team, struct team_port *port); + void (*port_change_mac)(struct team *team, struct team_port *port); +}; + +enum team_option_type { + TEAM_OPTION_TYPE_U32, + TEAM_OPTION_TYPE_STRING, +}; + +struct team_option { + struct list_head list; + const char *name; + enum team_option_type type; + int (*getter)(struct team *team, void *arg); + int (*setter)(struct team *team, void *arg); +}; + +struct team_mode { + struct list_head list; + const char *kind; + struct module *owner; + size_t priv_size; + const struct team_mode_ops *ops; +}; + +#define TEAM_PORT_HASHBITS 4 +#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS) + +#define TEAM_MODE_PRIV_LONGS 4 +#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) + +struct team { + struct net_device *dev; /* associated netdevice */ + struct team_pcpu_stats __percpu *pcpu_stats; + + spinlock_t lock; /* used for overall locking, e.g. port lists write */ + + /* + * port lists with port count + */ + int port_count; + struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES]; + struct list_head port_list; + + struct list_head option_list; + + const struct team_mode *mode; + struct team_mode_ops ops; + long mode_priv[TEAM_MODE_PRIV_LONGS]; +}; + +static inline struct hlist_head *team_port_index_hash(struct team *team, + int port_index) +{ + return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; +} + +static inline struct team_port *team_get_port_by_index(struct team *team, + int port_index) +{ + struct hlist_node *p; + struct team_port *port; + struct hlist_head *head = team_port_index_hash(team, port_index); + + hlist_for_each_entry(port, p, head, hlist) + if (port->index == port_index) + return port; + return NULL; +} +static inline struct team_port *team_get_port_by_index_rcu(struct team *team, + int port_index) +{ + struct hlist_node *p; + struct team_port *port; + struct hlist_head *head = team_port_index_hash(team, port_index); + + hlist_for_each_entry_rcu(port, p, head, hlist) + if (port->index == port_index) + return port; + return NULL; +} + +extern int team_port_set_team_mac(struct team_port *port); +extern void team_options_register(struct team *team, + struct team_option *option, + size_t option_count); +extern void team_options_unregister(struct team *team, + struct team_option *option, + size_t option_count); +extern int team_mode_register(struct team_mode *mode); +extern int team_mode_unregister(struct team_mode *mode); + +#endif /* __KERNEL__ */ + +#define TEAM_STRING_MAX_LEN 32 + +/********************************** + * NETLINK_GENERIC netlink family. + **********************************/ + +enum { + TEAM_CMD_NOOP, + TEAM_CMD_OPTIONS_SET, + TEAM_CMD_OPTIONS_GET, + TEAM_CMD_PORT_LIST_GET, + + __TEAM_CMD_MAX, + TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1), +}; + +enum { + TEAM_ATTR_UNSPEC, + TEAM_ATTR_TEAM_IFINDEX, /* u32 */ + TEAM_ATTR_LIST_OPTION, /* nest */ + TEAM_ATTR_LIST_PORT, /* nest */ + + __TEAM_ATTR_MAX, + TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1, +}; + +/* Nested layout of get/set msg: + * + * [TEAM_ATTR_LIST_OPTION] + * [TEAM_ATTR_ITEM_OPTION] + * [TEAM_ATTR_OPTION_*], ... + * [TEAM_ATTR_ITEM_OPTION] + * [TEAM_ATTR_OPTION_*], ... + * ... + * [TEAM_ATTR_LIST_PORT] + * [TEAM_ATTR_ITEM_PORT] + * [TEAM_ATTR_PORT_*], ... + * [TEAM_ATTR_ITEM_PORT] + * [TEAM_ATTR_PORT_*], ... + * ... + */ + +enum { + TEAM_ATTR_ITEM_OPTION_UNSPEC, + TEAM_ATTR_ITEM_OPTION, /* nest */ + + __TEAM_ATTR_ITEM_OPTION_MAX, + TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1, +}; + +enum { + TEAM_ATTR_OPTION_UNSPEC, + TEAM_ATTR_OPTION_NAME, /* string */ + TEAM_ATTR_OPTION_CHANGED, /* flag */ + TEAM_ATTR_OPTION_TYPE, /* u8 */ + TEAM_ATTR_OPTION_DATA, /* dynamic */ + + __TEAM_ATTR_OPTION_MAX, + TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1, +}; + +enum { + TEAM_ATTR_ITEM_PORT_UNSPEC, + TEAM_ATTR_ITEM_PORT, /* nest */ + + __TEAM_ATTR_ITEM_PORT_MAX, + TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1, +}; + +enum { + TEAM_ATTR_PORT_UNSPEC, + TEAM_ATTR_PORT_IFINDEX, /* u32 */ + TEAM_ATTR_PORT_CHANGED, /* flag */ + TEAM_ATTR_PORT_LINKUP, /* flag */ + TEAM_ATTR_PORT_SPEED, /* u32 */ + TEAM_ATTR_PORT_DUPLEX, /* u8 */ + + __TEAM_ATTR_PORT_MAX, + TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1, +}; + +/* + * NETLINK_GENERIC related info + */ +#define TEAM_GENL_NAME "team" +#define TEAM_GENL_VERSION 0x1 +#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" + +#endif /* _LINUX_IF_TEAM_H_ */ -- cgit v1.2.3 From 8b5c171bb3dc0686b2647a84e990199c5faa9ef8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2011 12:07:14 +0000 Subject: neigh: new unresolved queue limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mercredi 09 novembre 2011 à 16:21 -0500, David Miller a écrit : > From: David Miller > Date: Wed, 09 Nov 2011 16:16:44 -0500 (EST) > > > From: Eric Dumazet > > Date: Wed, 09 Nov 2011 12:14:09 +0100 > > > >> unres_qlen is the number of frames we are able to queue per unresolved > >> neighbour. Its default value (3) was never changed and is responsible > >> for strange drops, especially if IP fragments are used, or multiple > >> sessions start in parallel. Even a single tcp flow can hit this limit. > > ... > > > > Ok, I've applied this, let's see what happens :-) > > Early answer, build fails. > > Please test build this patch with DECNET enabled and resubmit. The > decnet neigh layer still refers to the removed ->queue_len member. > > Thanks. Ouch, this was fixed on one machine yesterday, but not the other one I used this morning, sorry. [PATCH V5 net-next] neigh: new unresolved queue limits unres_qlen is the number of frames we are able to queue per unresolved neighbour. Its default value (3) was never changed and is responsible for strange drops, especially if IP fragments are used, or multiple sessions start in parallel. Even a single tcp flow can hit this limit. $ arp -d 192.168.20.108 ; ping -c 2 -s 8000 192.168.20.108 PING 192.168.20.108 (192.168.20.108) 8000(8028) bytes of data. 8008 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.322 ms Signed-off-by: David S. Miller --- include/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index a7003b7a695d..b188f68a08c9 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -116,6 +116,7 @@ enum { NDTPA_PROXY_DELAY, /* u64, msecs */ NDTPA_PROXY_QLEN, /* u32 */ NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) -- cgit v1.2.3 From 06236ac3726f15124839cf16a9e2730a852dad9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 7 Nov 2011 14:23:11 +0000 Subject: net-netlink: Add a new attribute to expose TCLASS values via netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3ceca749668a52bd795585e0f71c6f0b04814f7b added a TOS attribute. Unfortunately TOS and TCLASS are both present in a dual-stack v6 socket, furthermore they can have different values. As such one cannot in a sane way expose both through a single attribute. Signed-off-by: Maciej Żenczyowski CC: Murali Raja CC: Stephen Hemminger CC: Eric Dumazet CC: David S. Miller Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 80b480c97532..abf5028db981 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -98,9 +98,10 @@ enum { INET_DIAG_VEGASINFO, INET_DIAG_CONG, INET_DIAG_TOS, + INET_DIAG_TCLASS, }; -#define INET_DIAG_MAX INET_DIAG_TOS +#define INET_DIAG_MAX INET_DIAG_TCLASS /* INET_DIAG_MEM */ -- cgit v1.2.3 From f1c6f1a7eed963ed233ba4c8b6fa8addb86c6ddc Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Wed, 26 Oct 2011 23:14:16 +0200 Subject: sched: Set the command name of the idle tasks in SMP kernels In UP systems, the idle task is initialized using the init_task structure from which the command name is taken (currently "swapper"). In SMP systems, one idle task per CPU is forked by the worker thread from which the task structure is copied. The command name is, therefore, "kworker/0:0" or "kworker/0:1", if not updated. Since such update was lacking, all idle tasks in SMP systems were incorrectly named. This longtime bug was not discovered immediately, because there is no /proc/0 entry - the bug only becomes apparent when tracing is enabled. This patch sets the command name of the idle tasks in SMP systems to the name that is used in the INIT_TASK structure suffixed by a slash and the number of the CPU. Signed-off-by: Carsten Emde Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111026211708.768925506@osadl.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08ffab01e76c..b6e5b8b000e0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -126,6 +126,8 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif +#define INIT_TASK_COMM "swapper" + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,7 +164,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = "swapper", \ + .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ -- cgit v1.2.3 From 94d24fc47219219b5aa23b45956cc37ee5aa5b01 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Jun 2011 11:17:30 +0200 Subject: printk, lockdep: Disable lock debugging on zap_locks() zap_locks() is used by printk() in a last ditch effort to get data out, clearly we cannot trust lock state after this so make it disable lock debugging. Also don't treat printk recursion through lockdep as a normal recursion bug but try hard to get the lockdep splat out. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kqxwmo4xz37e1s8w0xopvr0q@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b6a56e37284c..d36619ead3ba 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -343,6 +343,8 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) +#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -392,6 +394,8 @@ struct lock_class_key { }; #define lockdep_assert_held(l) do { } while (0) +#define lockdep_recursing(tsk) (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_LOCK_STAT -- cgit v1.2.3 From b2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Nov 2011 06:03:34 +0000 Subject: net: introduce build_skb() One of the thing we discussed during netdev 2011 conference was the idea to change some network drivers to allocate/populate their skb at RX completion time, right before feeding the skb to network stack. In old days, we allocated skbs when populating the RX ring. This means bringing into cpu cache sk_buff and skb_shared_info cache lines (since we clear/initialize them), then 'queue' skb->data to NIC. By the time NIC fills a frame in skb->data buffer and host can process it, cpu probably threw away the cache lines from its caches, because lot of things happened between the allocation and final use. So the deal would be to allocate only the data buffer for the NIC to populate its RX ring buffer. And use build_skb() at RX completion to attach a data buffer (now filled with an ethernet frame) to a new skb, initialize the skb_shared_info portion, and give the hot skb to network stack. build_skb() is the function to allocate an skb, caller providing the data buffer that should be attached to it. Drivers are expected to call skb_reserve() right after build_skb() to adjust skb->data to the Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN, but some drivers might add a hardware provided alignment) Data provided to build_skb() MUST have been allocated by a prior kmalloc() call, with enough room to add SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) bytes at the end of the data without corrupting incoming frame. data = kmalloc(NET_SKB_PAD + NET_IP_ALIGN + 1536 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), GFP_ATOMIC); ... skb = build_skb(data); if (!skb) { recycle_data(data); } else { skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); ... } Signed-off-by: Eric Dumazet CC: Eilon Greenstein CC: Ben Hutchings CC: Tom Herbert CC: Jamal Hadi Salim CC: Stephen Hemminger CC: Thomas Graf CC: Herbert Xu CC: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe864885c1ed..abad8a0941e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -540,6 +540,7 @@ extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); +extern struct sk_buff *build_skb(void *data); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { -- cgit v1.2.3 From 7b08fae8fbf0c14f003be8e039ed37bcbae4415a Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 1 Nov 2011 11:15:40 -0700 Subject: device.h: Fix struct member documentation Fix warning of make xmldocs of documention of the struct member iommu_ops from struct bus_type. Signed-off-by: Marcos Paulo de Souza Acked-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ffbcf95cd97d..2b8832060893 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -69,7 +69,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @resume: Called to bring a device on this bus out of sleep mode. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. - * @iommu_ops IOMMU specific operations for this bus, used to attach IOMMU + * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU * driver implementations to a bus and allow the driver to do * bus-specific setup * @p: The private data of the driver core, only the driver core can -- cgit v1.2.3 From 93f3350c46fa5dfcc9650eb19b186e71ffc924c3 Mon Sep 17 00:00:00 2001 From: Claudio Scordino Date: Wed, 9 Nov 2011 15:51:49 +0100 Subject: RS485: fix inconsistencies in the meaning of some variables The crisv10.c and the atmel_serial.c serial drivers intepret the fields of the serial_rs485 structure in a different way. In particular, crisv10.c uses SER_RS485_RTS_AFTER_SEND and SER_RS485_RTS_ON_SEND for the voltage of the RTS pin; atmel_serial.c, instead, uses these values to know if a delay must be set before and after sending. This patch makes the usage of these variables consistent across all drivers and fixes the Documentation as well. From now on, SER_RS485_RTS_AFTER_SEND and SER_RS485_RTS_ON_SEND will be used to set the voltage of the RTS pin (as in the crisv10.c driver); the delay will be understood by looking only at the value of delay_rts_before_send and delay_rts_after_send. Signed-off-by: Claudio Scordino Signed-off-by: Darron Black Acked-by: Jesper Nilsson Acked-by: Nicolas Ferre Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 97ff8e27a6cc..3d86517fe7d5 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -207,13 +207,15 @@ struct serial_icounter_struct { struct serial_rs485 { __u32 flags; /* RS485 feature flags */ -#define SER_RS485_ENABLED (1 << 0) -#define SER_RS485_RTS_ON_SEND (1 << 1) -#define SER_RS485_RTS_AFTER_SEND (1 << 2) -#define SER_RS485_RTS_BEFORE_SEND (1 << 3) +#define SER_RS485_ENABLED (1 << 0) /* If enabled */ +#define SER_RS485_RTS_ON_SEND (1 << 1) /* Logical level for + RTS pin when + sending */ +#define SER_RS485_RTS_AFTER_SEND (1 << 2) /* Logical level for + RTS pin after sent*/ #define SER_RS485_RX_DURING_TX (1 << 4) - __u32 delay_rts_before_send; /* Milliseconds */ - __u32 delay_rts_after_send; /* Milliseconds */ + __u32 delay_rts_before_send; /* Delay before send (milliseconds) */ + __u32 delay_rts_after_send; /* Delay after send (milliseconds) */ __u32 padding[5]; /* Memory is cheap, new structs are a royal PITA .. */ }; -- cgit v1.2.3 From 64882709ef07f3eae29c7afc5aa8b84d12733a72 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 15 Nov 2011 11:54:15 +0000 Subject: mdio-gpio: Add reset functionality to mdio-gpio driver(v2). This patch adds phy reset functionality to mdio-gpio driver. Now mdio_gpio_platform_data has new member as function pointer which can be filled at the bsp level for a callback from phy infrastructure. Also the mdio-bitbang driver fills-in the reset function of mii_bus structure. Without this patch the bsp level code has to takecare of the reseting PHY's on the bus, which become bit hacky for every bsp and phy-infrastructure is ignored aswell. Signed-off-by: Srinivas Kandagatla Signed-off-by: David S. Miller --- include/linux/mdio-bitbang.h | 2 ++ include/linux/mdio-gpio.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index 0fe00cd4c93c..76f52bbbb2f4 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -32,6 +32,8 @@ struct mdiobb_ops { struct mdiobb_ctrl { const struct mdiobb_ops *ops; + /* reset callback */ + int (*reset)(struct mii_bus *bus); }; /* The returned bus is not yet registered with the phy layer. */ diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h index e9d3fdfe41d7..7c9fe3c2be73 100644 --- a/include/linux/mdio-gpio.h +++ b/include/linux/mdio-gpio.h @@ -20,6 +20,8 @@ struct mdio_gpio_platform_data { unsigned int phy_mask; int irqs[PHY_MAX_ADDR]; + /* reset callback */ + int (*reset)(struct mii_bus *bus); }; #endif /* __LINUX_MDIO_GPIO_H */ -- cgit v1.2.3 From 72f8c0bfa0de64c68ee59f40eb9b2683bffffbb0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 25 Oct 2011 15:16:47 +0200 Subject: lib: devres: add convenience function to remap a resource Almost every platform_driver does the three steps get_resource, request_mem_region, ioremap. This does not only lead to a lot of code duplication, but also a huge number of similar error strings and inconsistent error codes on failure. So, introduce a helper function which simplifies remapping a resource and make it hard to do something wrong and add documentation for it. Signed-off-by: Wolfram Sang Acked-by: Grant Likely Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ffbcf95cd97d..c6335982774c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -490,6 +490,9 @@ extern int devres_release_group(struct device *dev, void *id); extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp); extern void devm_kfree(struct device *dev, void *p); +void __iomem *devm_request_and_ioremap(struct device *dev, + struct resource *res); + struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about -- cgit v1.2.3 From 0c614e2d3e6ee6ff13c6181f380787cea1d82d1d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 16 Nov 2011 09:21:48 +0100 Subject: include/linux/bio.h: use a static inline function for bio_integrity_clone() When CONFIG_BLK_DEV_INTEGRITY is not set, we get these warnings: drivers/md/dm.c: In function 'split_bvec': drivers/md/dm.c:1061:3: warning: statement with no effect drivers/md/dm.c: In function 'clone_bio': drivers/md/dm.c:1088:3: warning: statement with no effect Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a3c071c9e189..d2a3cc23d828 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -519,7 +519,11 @@ extern void bio_integrity_init(void); #define bioset_integrity_create(a, b) (0) #define bio_integrity_prep(a) (0) #define bio_integrity_enabled(a) (0) -#define bio_integrity_clone(a, b, c, d) (0) +static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) +{ + return 0; +} #define bioset_integrity_free(a) do { } while (0) #define bio_integrity_free(a, b) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0) -- cgit v1.2.3 From 121f099412bd6576dfb3d94222e89d9341362177 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Nov 2011 09:21:50 +0100 Subject: bio: change some signed vars to unsigned This is just a cleanup patch to silence a static checker warning. The problem is that we cap "nr_iovecs" so it can't be larger than "UIO_MAXIOV" but we don't check for negative values. It turns out this is prevented at other layers, but logically it doesn't make sense to have negative nr_iovecs so making it unsigned is nicer. Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d2a3cc23d828..847994aef0e9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -211,8 +211,8 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(gfp_t, int); -extern struct bio *bio_kmalloc(gfp_t, int); +extern struct bio *bio_alloc(gfp_t, unsigned int); +extern struct bio *bio_kmalloc(gfp_t, unsigned int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); -- cgit v1.2.3 From cd12909cb576d37311fe35868780e82d5007d0c8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 29 Sep 2011 16:53:32 +0100 Subject: xen: map foreign pages for shared rings by updating the PTEs directly When mapping a foreign page with xenbus_map_ring_valloc() with the GNTTABOP_map_grant_ref hypercall, set the GNTMAP_contains_pte flag and pass a pointer to the PTE (in init_mm). After the page is mapped, the usual fault mechanism can be used to update additional MMs. This allows the vmalloc_sync_all() to be removed from alloc_vm_area(). Signed-off-by: David Vrabel Acked-by: Andrew Morton [v1: Squashed fix by Michal for no-mmu case] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Michal Simek --- include/linux/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 687fb11e2010..4bde182fcf93 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -119,7 +119,7 @@ unmap_kernel_range(unsigned long addr, unsigned long size) #endif /* Allocate/destroy a 'vmalloc' VM area. */ -extern struct vm_struct *alloc_vm_area(size_t size); +extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes); extern void free_vm_area(struct vm_struct *area); /* for /dev/kmem */ -- cgit v1.2.3 From 720e4616e8fd85284ef1addd8b8d93d8415e8dbc Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 16 Nov 2011 16:28:17 +0100 Subject: regmap: Make reg_config reg_defaults const The reg_defaults field usually points to a static per driver array, which should not be modified. Make requirement this explicit by making reg_defaults const. To allow this the regcache_init code needs some minor changes. Previoulsy the reg_config was not available in regcache_init and regmap->reg_defaults was used to pass the default register set to regcache_init. Now that the reg_config is available we can work on it directly. Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 1e4ec2b6c2ea..458f15f4c37c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -83,7 +83,7 @@ struct regmap_config { bool (*precious_reg)(struct device *dev, unsigned int reg); unsigned int max_register; - struct reg_default *reg_defaults; + const struct reg_default *reg_defaults; unsigned int num_reg_defaults; enum regcache_type cache_type; const void *reg_defaults_raw; -- cgit v1.2.3 From 66846048f55c6c05a4c46c2daabb773173f8f28d Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Mon, 14 Nov 2011 14:17:08 +0000 Subject: enable virtio_net to return bus_info in ethtool -i consistent with emulated NICs Add a new .bus_name to virtio_config_ops then modify virtio_net to call through to it in an ethtool .get_drvinfo routine to report bus_info in ethtool -i output which is consistent with other emulated NICs and the output of lspci. Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- include/linux/virtio_config.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index add4790b21fe..63f98d0a8efa 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -100,6 +100,10 @@ * vdev: the virtio_device * This gives the final feature bits for the device: it can change * the dev->feature bits if it wants. + * @bus_name: return the bus name associated with the device + * vdev: the virtio_device + * This returns a pointer to the bus name a la pci_name from which + * the caller can then copy. */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -117,6 +121,7 @@ struct virtio_config_ops { void (*del_vqs)(struct virtio_device *); u32 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); + const char *(*bus_name)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ @@ -182,5 +187,14 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, return ERR_PTR(err); return vq; } + +static inline +const char *virtio_bus_name(struct virtio_device *vdev) +{ + if (!vdev->config->bus_name) + return "virtio"; + return vdev->config->bus_name(vdev); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From bc5787c6125cc2c868eaace46c46ce6e83dcfcb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: remove legacy ethtool ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As all drivers are converted, we may now remove discrete offload setting callback handling. Signed-off-by: Michał Mirosław Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 53 ----------------------------------------------- include/linux/netdevice.h | 16 -------------- 2 files changed, 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de33de1e2052..20db5b275c3f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -724,9 +724,6 @@ enum ethtool_sfeatures_retval_bits { #include -/* needed by dev_disable_lro() */ -extern int __ethtool_set_flags(struct net_device *dev, u32 flags); - extern int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); @@ -750,19 +747,6 @@ struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -u32 ethtool_op_get_tx_csum(struct net_device *dev); -int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); -int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); -int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data); -u32 ethtool_op_get_sg(struct net_device *dev); -int ethtool_op_set_sg(struct net_device *dev, u32 data); -u32 ethtool_op_get_tso(struct net_device *dev); -int ethtool_op_set_tso(struct net_device *dev, u32 data); -u32 ethtool_op_get_ufo(struct net_device *dev); -int ethtool_op_set_ufo(struct net_device *dev, u32 data); -u32 ethtool_op_get_flags(struct net_device *dev); -int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported); -bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported); /** * struct ethtool_ops - optional netdev operations @@ -807,22 +791,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported); * @get_pauseparam: Report pause parameters * @set_pauseparam: Set pause parameters. Returns a negative error code * or zero. - * @get_rx_csum: Deprecated in favour of the netdev feature %NETIF_F_RXCSUM. - * Report whether receive checksums are turned on or off. - * @set_rx_csum: Deprecated in favour of generic netdev features. Turn - * receive checksum on or off. Returns a negative error code or zero. - * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums - * are turned on or off. - * @set_tx_csum: Deprecated in favour of generic netdev features. Turn - * transmit checksums on or off. Returns a negative error code or zero. - * @get_sg: Deprecated as redundant. Report whether scatter-gather is - * enabled. - * @set_sg: Deprecated in favour of generic netdev features. Turn - * scatter-gather on or off. Returns a negative error code or zero. - * @get_tso: Deprecated as redundant. Report whether TCP segmentation - * offload is enabled. - * @set_tso: Deprecated in favour of generic netdev features. Turn TCP - * segmentation offload on or off. Returns a negative error code or zero. * @self_test: Run specified self-tests * @get_strings: Return a set of strings that describe the requested objects * @set_phys_id: Identify the physical devices, e.g. by flashing an LED @@ -844,15 +812,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported); * negative error code or zero. * @complete: Function to be called after any other operation except * @begin. Will be called even if the other operation failed. - * @get_ufo: Deprecated as redundant. Report whether UDP fragmentation - * offload is enabled. - * @set_ufo: Deprecated in favour of generic netdev features. Turn UDP - * fragmentation offload on or off. Returns a negative error code or zero. - * @get_flags: Deprecated as redundant. Report features included in - * &enum ethtool_flags that are enabled. - * @set_flags: Deprecated in favour of generic netdev features. Turn - * features included in &enum ethtool_flags on or off. Returns a - * negative error code or zero. * @get_priv_flags: Report driver-specific feature flags. * @set_priv_flags: Set driver-specific feature flags. Returns a negative * error code or zero. @@ -917,14 +876,6 @@ struct ethtool_ops { struct ethtool_pauseparam*); int (*set_pauseparam)(struct net_device *, struct ethtool_pauseparam*); - u32 (*get_rx_csum)(struct net_device *); - int (*set_rx_csum)(struct net_device *, u32); - u32 (*get_tx_csum)(struct net_device *); - int (*set_tx_csum)(struct net_device *, u32); - u32 (*get_sg)(struct net_device *); - int (*set_sg)(struct net_device *, u32); - u32 (*get_tso)(struct net_device *); - int (*set_tso)(struct net_device *, u32); void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); void (*get_strings)(struct net_device *, u32 stringset, u8 *); int (*set_phys_id)(struct net_device *, enum ethtool_phys_id_state); @@ -932,10 +883,6 @@ struct ethtool_ops { struct ethtool_stats *, u64 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); - u32 (*get_ufo)(struct net_device *); - int (*set_ufo)(struct net_device *, u32); - u32 (*get_flags)(struct net_device *); - int (*set_flags)(struct net_device *, u32); u32 (*get_priv_flags)(struct net_device *); int (*set_priv_flags)(struct net_device *, u32); int (*get_sset_count)(struct net_device *, int); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cbeb5867cff7..e34717a792b4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2592,22 +2592,6 @@ static inline int netif_is_bond_slave(struct net_device *dev) extern struct pernet_operations __net_initdata loopback_net_ops; -static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev) -{ - if (dev->features & NETIF_F_RXCSUM) - return 1; - if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum) - return 0; - return dev->ethtool_ops->get_rx_csum(dev); -} - -static inline u32 dev_ethtool_get_flags(struct net_device *dev) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_flags) - return 0; - return dev->ethtool_ops->get_flags(dev); -} - /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* netdev_printk helpers, similar to dev_printk */ -- cgit v1.2.3 From a59e2ecb859f2ab03bb2e230709f8039472ad2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: split netdev features to separate header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move features definitions to separate header so that linux/skbuff.h won't need to include linux/netdevice.h after netdev_features_t is introduced. Signed-off-by: Michał Mirosław Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 90 +++++++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 80 +----------------------------------- 2 files changed, 92 insertions(+), 78 deletions(-) create mode 100644 include/linux/netdev_features.h (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h new file mode 100644 index 000000000000..32640edf4d78 --- /dev/null +++ b/include/linux/netdev_features.h @@ -0,0 +1,90 @@ +/* + * Network device features. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_NETDEV_FEATURES_H +#define _LINUX_NETDEV_FEATURES_H + +/* Net device feature bits; if you change something, + * also update netdev_features_strings[] in ethtool.c */ + +#define NETIF_F_SG 1 /* Scatter/gather IO. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ +#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ +#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ +#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ +#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ +#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ +#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +#define NETIF_F_GSO 2048 /* Enable software GSO. */ +#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ + /* do not use LLTX in new drivers */ +#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ +#define NETIF_F_LRO 32768 /* large receive offload */ + +/* the GSO_MASK reserves bits 16 through 23 */ +#define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ +#define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */ +#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ +#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ +#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ +#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ +#define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */ +#define NETIF_F_LOOPBACK (1 << 31) /* Enable loopback */ + +/* Segmentation offload features */ +#define NETIF_F_GSO_SHIFT 16 +#define NETIF_F_GSO_MASK 0x00ff0000 +#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) +#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) +#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) +#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) +#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) +#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) + +/* Features valid for ethtool to change */ +/* = all defined minus driver/device-class-related */ +#define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ + NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) +#define NETIF_F_ETHTOOL_BITS (0xff3fffff & ~NETIF_F_NEVER_CHANGE) + +/* List of features with software fallbacks. */ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +#define NETIF_F_GEN_CSUM (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM) +#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) +#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) + +#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ + NETIF_F_FSO) + +/* + * If one device supports one of these features, then enable them + * for all in netdev_increment_features. + */ +#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) +/* + * If one device doesn't support one of these features, then disable it + * for all in netdev_increment_features. + */ +#define NETIF_F_ALL_FOR_ALL (NETIF_F_NOCACHE_COPY | NETIF_F_FSO) + +/* changeable features with no special hardware requirements */ +#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) + +#endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e34717a792b4..9cf6e90b171d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,8 @@ #include #endif +#include + struct vlan_group; struct netpoll_info; struct phy_device; @@ -1005,84 +1007,6 @@ struct net_device { /* mask of features inheritable by VLAN devices */ u32 vlan_features; - /* Net device feature bits; if you change something, - * also update netdev_features_strings[] in ethtool.c */ - -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_GSO 2048 /* Enable software GSO. */ -#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ - /* do not use LLTX in new drivers */ -#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ -#define NETIF_F_GRO 16384 /* Generic receive offload */ -#define NETIF_F_LRO 32768 /* large receive offload */ - -/* the GSO_MASK reserves bits 16 through 23 */ -#define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ -#define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */ -#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ -#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ -#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ -#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ -#define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */ -#define NETIF_F_LOOPBACK (1 << 31) /* Enable loopback */ - - /* Segmentation offload features */ -#define NETIF_F_GSO_SHIFT 16 -#define NETIF_F_GSO_MASK 0x00ff0000 -#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) -#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) -#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) -#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) - - /* Features valid for ethtool to change */ - /* = all defined minus driver/device-class-related */ -#define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) -#define NETIF_F_ETHTOOL_BITS (0xff3fffff & ~NETIF_F_NEVER_CHANGE) - - /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ - NETIF_F_TSO6 | NETIF_F_UFO) - - -#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) -#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) - -#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) - -#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ - NETIF_F_FSO) - - /* - * If one device supports one of these features, then enable them - * for all in netdev_increment_features. - */ -#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) - /* - * If one device doesn't support one of these features, then disable it - * for all in netdev_increment_features. - */ -#define NETIF_F_ALL_FOR_ALL (NETIF_F_NOCACHE_COPY | NETIF_F_FSO) - - /* changeable features with no special hardware requirements */ -#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) - /* Interface index. Unique device identifier */ int ifindex; int iflink; -- cgit v1.2.3 From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: introduce and use netdev_features_t for device features sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: add couple missing conversions in drivers split unexporting netdev_fix_features() implemented %pNF convert sock::sk_route_(no?)caps Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 ++++ include/linux/netdevice.h | 41 +++++++++++++++++++++++------------------ include/linux/skbuff.h | 4 +++- 3 files changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 32640edf4d78..af5238121826 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -10,6 +10,10 @@ #ifndef _LINUX_NETDEV_FEATURES_H #define _LINUX_NETDEV_FEATURES_H +#include + +typedef u32 netdev_features_t; + /* Net device feature bits; if you change something, * also update netdev_features_strings[] in ethtool.c */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cf6e90b171d..b35ffd735ecc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -847,12 +847,13 @@ struct netdev_tc_txq { * Called to release previously enslaved netdev. * * Feature/offload setting functions. - * u32 (*ndo_fix_features)(struct net_device *dev, u32 features); + * netdev_features_t (*ndo_fix_features)(struct net_device *dev, + * netdev_features_t features); * Adjusts the requested feature flags according to device-specific * constraints, and returns the resulting flags. Must not modify * the device state. * - * int (*ndo_set_features)(struct net_device *dev, u32 features); + * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). * Must return >0 or -errno if it changed dev->features itself. @@ -946,10 +947,10 @@ struct net_device_ops { struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); - u32 (*ndo_fix_features)(struct net_device *dev, - u32 features); + netdev_features_t (*ndo_fix_features)(struct net_device *dev, + netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, - u32 features); + netdev_features_t features); }; /* @@ -999,13 +1000,13 @@ struct net_device { struct list_head unreg_list; /* currently active device features */ - u32 features; + netdev_features_t features; /* user-changeable features */ - u32 hw_features; + netdev_features_t hw_features; /* user-requested features */ - u32 wanted_features; + netdev_features_t wanted_features; /* mask of features inheritable by VLAN devices */ - u32 vlan_features; + netdev_features_t vlan_features; /* Interface index. Unique device identifier */ int ifindex; @@ -1439,7 +1440,7 @@ struct packet_type { struct packet_type *, struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, - u32 features); + netdev_features_t features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); @@ -2444,7 +2445,8 @@ extern int netdev_set_master(struct net_device *dev, struct net_device *master) extern int netdev_set_bond_master(struct net_device *dev, struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features); +extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, + netdev_features_t features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); #else @@ -2471,11 +2473,13 @@ extern const char *netdev_drivername(const struct net_device *dev); extern void linkwatch_run_queue(void); -static inline u32 netdev_get_wanted_features(struct net_device *dev) +static inline netdev_features_t netdev_get_wanted_features( + struct net_device *dev) { return (dev->features & ~dev->hw_features) | dev->wanted_features; } -u32 netdev_increment_features(u32 all, u32 one, u32 mask); +netdev_features_t netdev_increment_features(netdev_features_t all, + netdev_features_t one, netdev_features_t mask); int __netdev_update_features(struct net_device *dev); void netdev_update_features(struct net_device *dev); void netdev_change_features(struct net_device *dev); @@ -2483,21 +2487,22 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -u32 netif_skb_features(struct sk_buff *skb); +netdev_features_t netif_skb_features(struct sk_buff *skb); -static inline int net_gso_ok(u32 features, int gso_type) +static inline int net_gso_ok(netdev_features_t features, int gso_type) { - int feature = gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; return (features & feature) == feature; } -static inline int skb_gso_ok(struct sk_buff *skb, u32 features) +static inline int skb_gso_ok(struct sk_buff *skb, netdev_features_t features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline int netif_needs_gso(struct sk_buff *skb, int features) +static inline int netif_needs_gso(struct sk_buff *skb, + netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index abad8a0941e8..a10e487c0864 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -30,6 +30,7 @@ #include #include #include +#include /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 @@ -2106,7 +2107,8 @@ extern void skb_split(struct sk_buff *skb, extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); -extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features); +extern struct sk_buff *skb_segment(struct sk_buff *skb, + netdev_features_t features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) -- cgit v1.2.3 From a19f2a6df28e0ccb4103b77cc17c03b62f4d573e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: Define enum for net device features. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define feature values by bit position instead of direct 2**i values and force the values to be of type netdev_features_t. Cleaned and extended from patch by Mahesh Bandewar : + added netdev_features_t casts + included bits under NETIF_F_GSO_MASK + moved feature #defines out of struct net_device definition Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 133 ++++++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index af5238121826..04ac8f8433e9 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -14,52 +14,105 @@ typedef u32 netdev_features_t; -/* Net device feature bits; if you change something, - * also update netdev_features_strings[] in ethtool.c */ - -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_GSO 2048 /* Enable software GSO. */ -#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ +enum { + NETIF_F_SG_BIT, /* Scatter/gather IO. */ + NETIF_F_IP_CSUM_BIT, /* Can checksum TCP/UDP over IPv4. */ + NETIF_F_NO_CSUM_BIT, /* Does not require checksum. F.e. loopack. */ + NETIF_F_HW_CSUM_BIT, /* Can checksum all the packets. */ + NETIF_F_IPV6_CSUM_BIT, /* Can checksum TCP/UDP over IPV6 */ + NETIF_F_HIGHDMA_BIT, /* Can DMA to high memory. */ + NETIF_F_FRAGLIST_BIT, /* Scatter/gather IO. */ + NETIF_F_HW_VLAN_TX_BIT, /* Transmit VLAN hw acceleration */ + NETIF_F_HW_VLAN_RX_BIT, /* Receive VLAN hw acceleration */ + NETIF_F_HW_VLAN_FILTER_BIT, /* Receive filtering on VLAN */ + NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ + NETIF_F_GSO_BIT, /* Enable software GSO. */ + NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ -#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ -#define NETIF_F_GRO 16384 /* Generic receive offload */ -#define NETIF_F_LRO 32768 /* large receive offload */ - -/* the GSO_MASK reserves bits 16 through 23 */ -#define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ -#define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */ -#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ -#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ -#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ -#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ -#define NETIF_F_NOCACHE_COPY (1 << 30) /* Use no-cache copyfromuser */ -#define NETIF_F_LOOPBACK (1 << 31) /* Enable loopback */ - -/* Segmentation offload features */ -#define NETIF_F_GSO_SHIFT 16 -#define NETIF_F_GSO_MASK 0x00ff0000 -#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) -#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) -#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) -#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) -#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) + NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ + NETIF_F_GRO_BIT, /* Generic receive offload */ + NETIF_F_LRO_BIT, /* large receive offload */ + + /**/NETIF_F_GSO_SHIFT, /* keep the order of SKB_GSO_* bits */ + NETIF_F_TSO_BIT /* ... TCPv4 segmentation */ + = NETIF_F_GSO_SHIFT, + NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ + NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ + NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ + NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ + NETIF_F_FSO_BIT, /* ... FCoE segmentation */ + NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */ + /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ + NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ + = NETIF_F_GSO_LAST, + + NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ + NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ + NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ + NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ + NETIF_F_RXHASH_BIT, /* Receive hashing offload */ + NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ + NETIF_F_NOCACHE_COPY_BIT, /* Use no-cache copyfromuser */ + NETIF_F_LOOPBACK_BIT, /* Enable loopback */ + + /* + * Add your fresh new feature above and remember to update + * netdev_features_strings[] in net/core/ethtool.c and maybe + * some feature mask #defines below. Please also describe it + * in Documentation/networking/netdev-features.txt. + */ + + /**/NETDEV_FEATURE_COUNT +}; + +/* copy'n'paste compression ;) */ +#define __NETIF_F_BIT(bit) ((netdev_features_t)1 << (bit)) +#define __NETIF_F(name) __NETIF_F_BIT(NETIF_F_##name##_BIT) + +#define NETIF_F_FCOE_CRC __NETIF_F(FCOE_CRC) +#define NETIF_F_FCOE_MTU __NETIF_F(FCOE_MTU) +#define NETIF_F_FRAGLIST __NETIF_F(FRAGLIST) +#define NETIF_F_FSO __NETIF_F(FSO) +#define NETIF_F_GRO __NETIF_F(GRO) +#define NETIF_F_GSO __NETIF_F(GSO) +#define NETIF_F_GSO_ROBUST __NETIF_F(GSO_ROBUST) +#define NETIF_F_HIGHDMA __NETIF_F(HIGHDMA) +#define NETIF_F_HW_CSUM __NETIF_F(HW_CSUM) +#define NETIF_F_HW_VLAN_FILTER __NETIF_F(HW_VLAN_FILTER) +#define NETIF_F_HW_VLAN_RX __NETIF_F(HW_VLAN_RX) +#define NETIF_F_HW_VLAN_TX __NETIF_F(HW_VLAN_TX) +#define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) +#define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) +#define NETIF_F_LLTX __NETIF_F(LLTX) +#define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) +#define NETIF_F_LRO __NETIF_F(LRO) +#define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) +#define NETIF_F_NOCACHE_COPY __NETIF_F(NOCACHE_COPY) +#define NETIF_F_NO_CSUM __NETIF_F(NO_CSUM) +#define NETIF_F_NTUPLE __NETIF_F(NTUPLE) +#define NETIF_F_RXCSUM __NETIF_F(RXCSUM) +#define NETIF_F_RXHASH __NETIF_F(RXHASH) +#define NETIF_F_SCTP_CSUM __NETIF_F(SCTP_CSUM) +#define NETIF_F_SG __NETIF_F(SG) +#define NETIF_F_TSO6 __NETIF_F(TSO6) +#define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) +#define NETIF_F_TSO __NETIF_F(TSO) +#define NETIF_F_UFO __NETIF_F(UFO) +#define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) -#define NETIF_F_ETHTOOL_BITS (0xff3fffff & ~NETIF_F_NEVER_CHANGE) + +/* remember that ((t)1 << t_BITS) is undefined in C99 */ +#define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ + (__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) - 1)) & \ + ~NETIF_F_NEVER_CHANGE) + +/* Segmentation offload feature mask */ +#define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ + __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ -- cgit v1.2.3 From a861a8b233e9024303fb8e73e465e81ad7119d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: extend netdev_features_t to 64 bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 04ac8f8433e9..20e3a1f9892d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -12,7 +12,7 @@ #include -typedef u32 netdev_features_t; +typedef u64 netdev_features_t; enum { NETIF_F_SG_BIT, /* Scatter/gather IO. */ -- cgit v1.2.3 From 34324dc2bf27c1773045fea63cb11f7e2a6ad2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: remove NETIF_F_NO_CSUM feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only distinct use is checking if NETIF_F_NOCACHE_COPY should be enabled by default. The check heuristics is altered a bit here, so it hits other people than before. The default shouldn't be trusted for performance-critical cases anyway. For all other uses NETIF_F_NO_CSUM is equivalent to NETIF_F_HW_CSUM. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 5 ++--- include/linux/skbuff.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 20e3a1f9892d..77f5202977ce 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -17,7 +17,7 @@ typedef u64 netdev_features_t; enum { NETIF_F_SG_BIT, /* Scatter/gather IO. */ NETIF_F_IP_CSUM_BIT, /* Can checksum TCP/UDP over IPv4. */ - NETIF_F_NO_CSUM_BIT, /* Does not require checksum. F.e. loopack. */ + __UNUSED_NETIF_F_1, NETIF_F_HW_CSUM_BIT, /* Can checksum all the packets. */ NETIF_F_IPV6_CSUM_BIT, /* Can checksum TCP/UDP over IPV6 */ NETIF_F_HIGHDMA_BIT, /* Can DMA to high memory. */ @@ -88,7 +88,6 @@ enum { #define NETIF_F_LRO __NETIF_F(LRO) #define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) #define NETIF_F_NOCACHE_COPY __NETIF_F(NOCACHE_COPY) -#define NETIF_F_NO_CSUM __NETIF_F(NO_CSUM) #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) #define NETIF_F_RXHASH __NETIF_F(RXHASH) @@ -118,7 +117,7 @@ enum { #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) -#define NETIF_F_GEN_CSUM (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM) +#define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM #define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a10e487c0864..b93117389cfe 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -88,7 +88,6 @@ * at device setup time. * NETIF_F_HW_CSUM - it is clever device, it is able to checksum * everything. - * NETIF_F_NO_CSUM - loopback or reliable single hop media. * NETIF_F_IP_CSUM - device is dumb. It is able to csum only * TCP/UDP over IPv4. Sigh. Vendors like this * way by an unknown reason. Though, see comment above -- cgit v1.2.3 From 61dc3461b9549bc10a2f16d254250680cadafcce Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 16 Nov 2011 11:09:08 +0000 Subject: team: convert overall spinlock to mutex No need to have spinlock for this purpose. So convert this to mutex and avoid current schedule while atomic problems in netlink code. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_team.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 14f6388f5460..a6eac126a99a 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -92,7 +92,7 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; - spinlock_t lock; /* used for overall locking, e.g. port lists write */ + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* * port lists with port count -- cgit v1.2.3 From 358b838291f618278080bbed435b755f9b46748e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 16 Nov 2011 11:09:09 +0000 Subject: team: replicate options on register Since multiple team instances are putting defined options into their option list, during register each option must be cloned before added into list. This resolves uncool memory corruptions when using multiple teams. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_team.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a6eac126a99a..828181fbad5d 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -140,11 +140,11 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, } extern int team_port_set_team_mac(struct team_port *port); -extern void team_options_register(struct team *team, - struct team_option *option, - size_t option_count); +extern int team_options_register(struct team *team, + const struct team_option *option, + size_t option_count); extern void team_options_unregister(struct team *team, - struct team_option *option, + const struct team_option *option, size_t option_count); extern int team_mode_register(struct team_mode *mode); extern int team_mode_unregister(struct team_mode *mode); -- cgit v1.2.3 From 28011cf19b75df9d3f35489a7599a97ec0b3f1a0 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Wed, 16 Nov 2011 18:36:59 -0500 Subject: net: Add ethtool to mii advertisment conversion helpers Translating between ethtool advertisement settings and MII advertisements are common operations for ethernet drivers. This patch adds a set of helper functions that implements the conversion. The patch then modifies a couple of the drivers to use the new functions. Signed-off-by: Matt Carlson Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/mii.h | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 27748230aa69..6697b9112014 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -9,6 +9,7 @@ #define __LINUX_MII_H__ #include +#include /* Generic MII registers. */ #define MII_BMCR 0x00 /* Basic mode control register */ @@ -239,6 +240,171 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, return 0; } +/** + * ethtool_adv_to_mii_100bt + * @ethadv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement + * settings to phy autonegotiation advertisements for the + * MII_ADVERTISE register. + */ +static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv) +{ + u32 result = 0; + + if (ethadv & ADVERTISED_10baseT_Half) + result |= ADVERTISE_10HALF; + if (ethadv & ADVERTISED_10baseT_Full) + result |= ADVERTISE_10FULL; + if (ethadv & ADVERTISED_100baseT_Half) + result |= ADVERTISE_100HALF; + if (ethadv & ADVERTISED_100baseT_Full) + result |= ADVERTISE_100FULL; + if (ethadv & ADVERTISED_Pause) + result |= ADVERTISE_PAUSE_CAP; + if (ethadv & ADVERTISED_Asym_Pause) + result |= ADVERTISE_PAUSE_ASYM; + + return result; +} + +/** + * mii_adv_to_ethtool_100bt + * @adv: value of the MII_ADVERTISE register + * + * A small helper function that translates MII_ADVERTISE bits + * to ethtool advertisement settings. + */ +static inline u32 mii_adv_to_ethtool_100bt(u32 adv) +{ + u32 result = 0; + + if (adv & ADVERTISE_10HALF) + result |= ADVERTISED_10baseT_Half; + if (adv & ADVERTISE_10FULL) + result |= ADVERTISED_10baseT_Full; + if (adv & ADVERTISE_100HALF) + result |= ADVERTISED_100baseT_Half; + if (adv & ADVERTISE_100FULL) + result |= ADVERTISED_100baseT_Full; + if (adv & ADVERTISE_PAUSE_CAP) + result |= ADVERTISED_Pause; + if (adv & ADVERTISE_PAUSE_ASYM) + result |= ADVERTISED_Asym_Pause; + + return result; +} + +/** + * ethtool_adv_to_mii_1000T + * @ethadv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement + * settings to phy autonegotiation advertisements for the + * MII_CTRL1000 register when in 1000T mode. + */ +static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv) +{ + u32 result = 0; + + if (ethadv & ADVERTISED_1000baseT_Half) + result |= ADVERTISE_1000HALF; + if (ethadv & ADVERTISED_1000baseT_Full) + result |= ADVERTISE_1000FULL; + + return result; +} + +/** + * mii_adv_to_ethtool_1000T + * @adv: value of the MII_CTRL1000 register + * + * A small helper function that translates MII_CTRL1000 + * bits, when in 1000Base-T mode, to ethtool + * advertisement settings. + */ +static inline u32 mii_adv_to_ethtool_1000T(u32 adv) +{ + u32 result = 0; + + if (adv & ADVERTISE_1000HALF) + result |= ADVERTISED_1000baseT_Half; + if (adv & ADVERTISE_1000FULL) + result |= ADVERTISED_1000baseT_Full; + + return result; +} + +#define mii_lpa_to_ethtool_100bt(lpa) mii_adv_to_ethtool_100bt(lpa) + +/** + * mii_lpa_to_ethtool_1000T + * @adv: value of the MII_STAT1000 register + * + * A small helper function that translates MII_STAT1000 + * bits, when in 1000Base-T mode, to ethtool + * advertisement settings. + */ +static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa) +{ + u32 result = 0; + + if (lpa & LPA_1000HALF) + result |= ADVERTISED_1000baseT_Half; + if (lpa & LPA_1000FULL) + result |= ADVERTISED_1000baseT_Full; + + return result; +} + +/** + * ethtool_adv_to_mii_1000X + * @ethadv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement + * settings to phy autonegotiation advertisements for the + * MII_CTRL1000 register when in 1000Base-X mode. + */ +static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv) +{ + u32 result = 0; + + if (ethadv & ADVERTISED_1000baseT_Half) + result |= ADVERTISE_1000XHALF; + if (ethadv & ADVERTISED_1000baseT_Full) + result |= ADVERTISE_1000XFULL; + if (ethadv & ADVERTISED_Pause) + result |= ADVERTISE_1000XPAUSE; + if (ethadv & ADVERTISED_Asym_Pause) + result |= ADVERTISE_1000XPSE_ASYM; + + return result; +} + +/** + * mii_adv_to_ethtool_1000X + * @adv: value of the MII_CTRL1000 register + * + * A small helper function that translates MII_CTRL1000 + * bits, when in 1000Base-X mode, to ethtool + * advertisement settings. + */ +static inline u32 mii_adv_to_ethtool_1000X(u32 adv) +{ + u32 result = 0; + + if (adv & ADVERTISE_1000XHALF) + result |= ADVERTISED_1000baseT_Half; + if (adv & ADVERTISE_1000XFULL) + result |= ADVERTISED_1000baseT_Full; + if (adv & ADVERTISE_1000XPAUSE) + result |= ADVERTISED_Pause; + if (adv & ADVERTISE_1000XPSE_ASYM) + result |= ADVERTISED_Asym_Pause; + + return result; +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From ea441d1104cf1efb471fa81bc91e9fd1e6ae29fd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Nov 2011 21:43:59 -0500 Subject: new helper: mount_subtree() takes vfsmount and relative path, does lookup within that vfsmount (possibly triggering automounts) and returns the result as root of subtree suitable for return by ->mount() (i.e. a reference to dentry and an active reference to its superblock grabbed, superblock locked exclusive). btrfs and nfs switched to it instead of open-coding the sucker. Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0c4df261af7e..e3130220ce3e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1886,6 +1886,7 @@ extern struct dentry *mount_single(struct file_system_type *fs_type, extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); -- cgit v1.2.3 From 0345e1864283207bc236120dd3e13ff2391fa85f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 16 Nov 2011 14:05:33 +0000 Subject: net: verify GSO flag bits against netdev features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b35ffd735ecc..31da3bbe7b1b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2492,6 +2492,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb); static inline int net_gso_ok(netdev_features_t features, int gso_type) { netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; + + /* check flags correspondence */ + BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); + return (features & feature) == feature; } -- cgit v1.2.3 From ccf5ff69fbbd8d877377f5786369cf5aa78a15fc Mon Sep 17 00:00:00 2001 From: david decotigny Date: Wed, 16 Nov 2011 12:15:10 +0000 Subject: net: new counter for tx_timeout errors in sysfs This adds the /sys/class/net/DEV/queues/Q/tx_timeout attribute containing the total number of timeout events on the given queue. It is always available with CONFIG_SYSFS, independently of CONFIG_RPS/XPS. Credits to Stephen Hemminger for a preliminary version of this patch. Tested: without CONFIG_SYSFS (compilation only) with sysfs and without CONFIG_RPS & CONFIG_XPS with sysfs and without CONFIG_RPS with sysfs and without CONFIG_XPS with defaults Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 31da3bbe7b1b..4d5698aa828b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -532,7 +532,7 @@ struct netdev_queue { struct Qdisc *qdisc; unsigned long state; struct Qdisc *qdisc_sleeping; -#if defined(CONFIG_RPS) || defined(CONFIG_XPS) +#ifdef CONFIG_SYSFS struct kobject kobj; #endif #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) @@ -547,6 +547,12 @@ struct netdev_queue { * please use this field instead of dev->trans_start */ unsigned long trans_start; + + /* + * Number of TX timeouts for this queue + * (/sys/class/net/DEV/Q/trans_timeout) + */ + unsigned long trans_timeout; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1109,9 +1115,11 @@ struct net_device { unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ -#if defined(CONFIG_RPS) || defined(CONFIG_XPS) +#ifdef CONFIG_SYSFS struct kset *queues_kset; +#endif +#ifdef CONFIG_RPS struct netdev_rx_queue *_rx; /* Number of RX queues allocated at register_netdev() time */ -- cgit v1.2.3 From 029632fbb7b7c9d85063cc9eb470de6c54873df3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2011 10:00:11 +0200 Subject: sched: Make separate sched*.c translation units Since once needs to do something at conferences and fixing compile warnings doesn't actually require much if any attention I decided to break up the sched.c #include "*.c" fest. This further modularizes the scheduler code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-x0fcd3mnp8f9c99grcpewmhi@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/latencytop.h | 3 ++- include/linux/sched.h | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index b0e99898527c..e23121f9d82a 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -10,6 +10,8 @@ #define _INCLUDE_GUARD_LATENCYTOP_H_ #include +struct task_struct; + #ifdef CONFIG_LATENCYTOP #define LT_SAVECOUNT 32 @@ -23,7 +25,6 @@ struct latency_record { }; -struct task_struct; extern int latencytop_enabled; void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2c..8db17b7622ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -925,6 +925,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/** + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. + * @group: The group whose first cpu is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_cpus(group)); +} + struct sched_domain_attr { int relax_domain_level; }; -- cgit v1.2.3 From 468e6a20afaccb67e2a7d7f60d301f90e1c6f301 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 7 Sep 2011 10:41:32 -0600 Subject: writeback: remove vm_dirties and task->dirties They are not used any more. Signed-off-by: Wu Fengguang --- include/linux/init_task.h | 1 - include/linux/sched.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08ffab01e76c..94b1e356c02a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -184,7 +184,6 @@ extern struct cred init_cred; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ - .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2c..1c4f3e9b9bc5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1521,7 +1521,6 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif - struct prop_local_single dirties; /* * when (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for some dirty throttling pause -- cgit v1.2.3 From bb75c627fb0dfb8c0ab75d3033709ff928896e16 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 17 Nov 2011 15:26:35 +0100 Subject: Revert "KVM: PPC: Add support for explicit HIOR setting" This reverts commit a15bd354f083f20f257db450488db52ac27df439. It exceeded the padding on the SREGS struct, rendering the ABI backwards-incompatible. Conflicts: arch/powerpc/kvm/powerpc.c include/linux/kvm.h Signed-off-by: Avi Kivity --- include/linux/kvm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f47fcd30273d..c3892fc1d538 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -555,7 +555,6 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_SMT 64 #define KVM_CAP_PPC_RMA 65 #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ -#define KVM_CAP_PPC_HIOR 67 #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_S390_GMAP 71 -- cgit v1.2.3 From 907d0ed1c84114d4e8dafd66af982515d3739c90 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 16 Nov 2011 10:13:35 +0100 Subject: drivercore: Generalize module_platform_driver This patch generalizes the module_platform_driver macro and introduces a new module_driver macro. The module_driver macro takes a driver name, a register and a unregister function for this driver type. Using these it construct the module init and exit sections which register and unregister the driver. Since such init/exit sections are commonly found in drivers this macro can be used to eliminate a lot of boilerplate code. The macro is not intended to be used by driver modules directly, instead it should be used to generate bus specific macros for registering drivers like the module_platform_driver macro. Signed-off-by: Lars-Peter Clausen Acked-by: Grant Likely Acked-by: Jonathan Cameron Acked-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 21 +++++++++++++++++++++ include/linux/platform_device.h | 12 ++---------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c6335982774c..341fb740d851 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -922,4 +922,25 @@ extern long sysfs_deprecated; #define sysfs_deprecated 0 #endif +/** + * module_driver() - Helper macro for drivers that don't do anything + * special in module init/exit. This eliminates a lot of boilerplate. + * Each module may only use this macro once, and calling it replaces + * module_init() and module_exit(). + * + * Use this macro to construct bus specific macros for registering + * drivers, and do not use it on its own. + */ +#define module_driver(__driver, __register, __unregister) \ +static int __init __driver##_init(void) \ +{ \ + return __register(&(__driver)); \ +} \ +module_init(__driver##_init); \ +static void __exit __driver##_exit(void) \ +{ \ + __unregister(&(__driver)); \ +} \ +module_exit(__driver##_exit); + #endif /* _DEVICE_H_ */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 2a23f7d1a825..165a8d175370 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -196,16 +196,8 @@ static inline void platform_set_drvdata(struct platform_device *pdev, void *data * calling it replaces module_init() and module_exit() */ #define module_platform_driver(__platform_driver) \ -static int __init __platform_driver##_init(void) \ -{ \ - return platform_driver_register(&(__platform_driver)); \ -} \ -module_init(__platform_driver##_init); \ -static void __exit __platform_driver##_exit(void) \ -{ \ - platform_driver_unregister(&(__platform_driver)); \ -} \ -module_exit(__platform_driver##_exit); + module_driver(__platform_driver, platform_driver_register, \ + platform_driver_unregister) extern struct platform_device *platform_create_bundle(struct platform_driver *driver, int (*probe)(struct platform_device *), -- cgit v1.2.3 From 7c92784a546d2945b6d6973a30f7134be78eb7a4 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 16 Nov 2011 10:13:36 +0100 Subject: I2C: Add helper macro for i2c_driver boilerplate This patch introduces the module_i2c_driver macro which is a convenience macro for I2C driver modules similar to module_platform_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the I2C driver. By using this macro it is possible to eliminate a few lines of boilerplate code per I2C driver. Signed-off-by: Lars-Peter Clausen Acked-by: Grant Likely Acked-by: Jonathan Cameron Acked-by: Wolfram Sang Acked-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a81bf6d23b3e..7e92854fe9cc 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -485,6 +485,19 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) { return adap->nr; } + +/** + * module_i2c_driver() - Helper macro for registering a I2C driver + * @__i2c_driver: i2c_driver struct + * + * Helper macro for I2C drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_i2c_driver(__i2c_driver) \ + module_driver(__i2c_driver, i2c_add_driver, \ + i2c_del_driver) + #endif /* I2C */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 3acbb0142d48713a8f65cde678a54f419801c189 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 16 Nov 2011 10:13:37 +0100 Subject: SPI: Add helper macro for spi_driver boilerplate This patch introduces the module_spi_driver macro which is a convenience macro for SPI driver modules similar to module_platform_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the SPI driver. By using this macro it is possible to eliminate a few lines of boilerplate code per SPI driver. Signed-off-by: Lars-Peter Clausen Acked-by: Grant Likely Acked-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- include/linux/spi/spi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index bb4f5fbbbd8e..176fce9cc6b1 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -200,6 +200,17 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) driver_unregister(&sdrv->driver); } +/** + * module_spi_driver() - Helper macro for registering a SPI driver + * @__spi_driver: spi_driver struct + * + * Helper macro for SPI drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_spi_driver(__spi_driver) \ + module_driver(__spi_driver, spi_register_driver, \ + spi_unregister_driver) /** * struct spi_master - interface to SPI master controller -- cgit v1.2.3 From 8b258cc8ac229aa7d5dcb7cc34cb35d9124498ac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Nov 2011 21:39:33 +0100 Subject: PM Sleep: Do not extend wakeup paths to devices with ignore_children set Commit 4ca46ff3e0d8c234cb40ebb6457653b59584426c (PM / Sleep: Mark devices involved in wakeup signaling during suspend) introduced the power.wakeup_path field in struct dev_pm_info to mark devices whose children are enabled to wake up the system from sleep states, so that power domains containing the parents that provide their children with wakeup power and/or relay their wakeup signals are not turned off. Unfortunately, that introduced a PM regression on SH7372 whose power consumption in the system "memory sleep" state increased as a result of it, because it prevented the power domain containing the I2C controller from being turned off when some children of that controller were enabled to wake up the system, although the controller was not necessary for them to signal wakeup. To fix this issue use the observation that devices whose power.ignore_children flag is set for runtime PM should be treated analogously during system suspend. Namely, they shouldn't be included in wakeup paths going through their children. Since the SH7372 I2C controller's power.ignore_children flag is set, doing so will restore the previous behavior of that SOC. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++++ include/linux/pm.h | 2 +- include/linux/pm_runtime.h | 6 ------ 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ffbcf95cd97d..52b3a4111df9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -682,6 +682,11 @@ static inline bool device_async_suspend_enabled(struct device *dev) return !!dev->power.async_suspend; } +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) +{ + dev->power.ignore_children = enable; +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); diff --git a/include/linux/pm.h b/include/linux/pm.h index f15acb646813..5c4c8b18c8b7 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -447,6 +447,7 @@ struct dev_pm_info { unsigned int async_suspend:1; bool is_prepared:1; /* Owned by the PM core */ bool is_suspended:1; /* Ditto */ + bool ignore_children:1; spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; @@ -464,7 +465,6 @@ struct dev_pm_info { atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; - unsigned int ignore_children:1; unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d8d903619642..d3085e72a0ee 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -52,11 +52,6 @@ static inline bool pm_children_suspended(struct device *dev) || !atomic_read(&dev->power.child_count); } -static inline void pm_suspend_ignore_children(struct device *dev, bool enable) -{ - dev->power.ignore_children = enable; -} - static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); @@ -130,7 +125,6 @@ static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline bool pm_children_suspended(struct device *dev) { return false; } -static inline void pm_suspend_ignore_children(struct device *dev, bool en) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool device_run_wake(struct device *dev) { return false; } -- cgit v1.2.3 From f6f8285132907757ef84ef8dae0a1244b8cde6ac Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2011 12:58:07 -0800 Subject: pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook Signed-off-by: Tony Luck --- include/linux/pstore.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index ea567321ae3c..2ca8cde5459d 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -35,10 +35,12 @@ struct pstore_info { spinlock_t buf_lock; /* serialize access to 'buf' */ char *buf; size_t bufsize; + struct mutex read_mutex; /* serialize open/read/close */ int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi); + struct timespec *time, char **buf, + struct pstore_info *psi); int (*write)(enum pstore_type_id type, u64 *id, unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, -- cgit v1.2.3 From 3d6d8d20ec4fd3b256632edb373a9c504724b8a9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2011 13:13:29 -0800 Subject: pstore: pass reason to backend write callback This allows a backend to filter on the dmesg reason as well as the pstore reason. When ramoops is switched to pstore, this is needed since it has no interest in storing non-crash dmesg details. Drop pstore_write() as it has no users, and handling the "reason" here has no obviously correct value. Signed-off-by: Kees Cook Signed-off-by: Tony Luck --- include/linux/pstore.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 2ca8cde5459d..e1461e143be2 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -22,6 +22,9 @@ #ifndef _LINUX_PSTORE_H #define _LINUX_PSTORE_H +#include +#include + /* types */ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, @@ -41,7 +44,8 @@ struct pstore_info { ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, char **buf, struct pstore_info *psi); - int (*write)(enum pstore_type_id type, u64 *id, + int (*write)(enum pstore_type_id type, + enum kmsg_dump_reason reason, u64 *id, unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); @@ -50,18 +54,12 @@ struct pstore_info { #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); -extern int pstore_write(enum pstore_type_id type, char *buf, size_t size); #else static inline int pstore_register(struct pstore_info *psi) { return -ENODEV; } -static inline int -pstore_write(enum pstore_type_id type, char *buf, size_t size) -{ - return -ENODEV; -} #endif #endif /*_LINUX_PSTORE_H*/ -- cgit v1.2.3 From adc9300e78e6091a7eaa1821213836379d4dbaa8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Nov 2011 03:13:26 +0000 Subject: net: use jump_label to shortcut RPS if not setup Most machines dont use RPS/RFS, and pay a fair amount of instructions in netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover RPS/RFS is not setup. Add a jump_label named rps_needed. If no device rps_map or global rps_sock_flow_table is setup, netif_receive_skb() / netif_rx() do a single instruction instead of many ones, including conditional jumps. jmp +0 (if CONFIG_JUMP_LABEL=y) Signed-off-by: Eric Dumazet CC: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d5698aa828b..0bbe030fc014 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -214,6 +214,11 @@ enum { #include #include +#ifdef CONFIG_RPS +#include +extern struct jump_label_key rps_needed; +#endif + struct neighbour; struct neigh_parms; struct sk_buff; -- cgit v1.2.3 From f3a6a4b6cfc80e57bf16bb12f9425bec1a5731a9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 17 Nov 2011 14:38:33 -0800 Subject: USB: Add helper macro for usb_driver boilerplate This patch introduces the module_usb_driver macro which is a convenience macro for USB driver modules similar to module_platform_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the USB driver. By using this macro it is possible to eliminate a few lines of boilerplate code per USB driver. Based on work done by Lars-Peter Clausen for other busses (i2c and spi). Cc: Lars-Peter Clausen Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index d3d0c1374334..5d258c3180ae 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -953,6 +953,18 @@ extern int usb_register_driver(struct usb_driver *, struct module *, extern void usb_deregister(struct usb_driver *); +/** + * module_usb_driver() - Helper macro for registering a USB driver + * @__usb_driver: usb_driver struct + * + * Helper macro for USB drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_usb_driver(__usb_driver) \ + module_driver(__usb_driver, usb_register, \ + usb_deregister) + extern int usb_register_device_driver(struct usb_device_driver *, struct module *); extern void usb_deregister_device_driver(struct usb_device_driver *); -- cgit v1.2.3 From 1a087c6ad975bcc193b4bab2e9d61f9c6c547138 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Fri, 18 Nov 2011 14:50:21 +0100 Subject: debugfs: add tools to printk 32-bit registers Some debugfs file I deal with are mostly blocks of registers, i.e. lines of the form " = 0x". Some files are only registers, some include registers blocks among other material. This patch introduces data structures and functions to deal with both cases. I expect more users of this over time. Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Cc: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e7d9b20ddc5b..5e6b01f6db4c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -16,6 +16,7 @@ #define _DEBUGFS_H_ #include +#include #include @@ -26,6 +27,17 @@ struct debugfs_blob_wrapper { unsigned long size; }; +struct debugfs_reg32 { + char *name; + unsigned long offset; +}; + +struct debugfs_regset32 { + struct debugfs_reg32 *regs; + int nregs; + void __iomem *base; +}; + extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) @@ -74,6 +86,13 @@ struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); +struct dentry *debugfs_create_regset32(const char *name, mode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset); + +int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, + int nregs, void __iomem *base, char *prefix); + bool debugfs_initialized(void); #else @@ -188,6 +207,13 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_regset32(const char *name, + mode_t mode, struct dentry *parent, + struct debugfs_regset32 *regset) +{ + return ERR_PTR(-ENODEV); +} + static inline bool debugfs_initialized(void) { return false; -- cgit v1.2.3 From be9b7335e70696bee731c152429b1737e42fe163 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 25 Aug 2011 00:24:21 -0400 Subject: mm: add vm_area_add_early() The existing vm_area_register_early() allows for early vmalloc space allocation. However upcoming cleanups in the ARM architecture require that some fixed locations in the vmalloc area be reserved also very early. The name "vm_area_register_early" would have been a good name for the reservation part without the allocation. Since it is already in use with different semantics, let's create vm_area_add_early() instead. Both vm_area_register_early() and vm_area_add_early() can be used together meaning that the former is now implemented using the later where it is ensured that no conflicting areas are added, but no attempt is made to make the allocation scheme in vm_area_register_early() more sophisticated. After all, you must know what you're doing when using those functions. Signed-off-by: Nicolas Pitre Acked-by: Andrew Morton Cc: linux-mm@kvack.org --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 687fb11e2010..4115d6aa80be 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -131,6 +131,7 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #ifdef CONFIG_SMP -- cgit v1.2.3 From 56c978f1da1f630ef18aa668a9748c6c23ab819b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Nov 2011 02:20:05 +0000 Subject: net: Remove LL_ALLOCATED_SPACE net: Remove LL_ALLOCATED_SPACE The macro LL_ALLOCATED_SPACE was ill-conceived. It applies the alignment to the sum of needed_headroom and needed_tailroom. As the amount that is then reserved for head room is needed_headroom with alignment, this means that the tail room left may be too small. Now that all uses of LL_ALLOCATED_SPACE have been removed, this patch finally removes the macro itself. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0bbe030fc014..3eb383a9b5ed 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -279,16 +279,11 @@ struct hh_cache { * * We could use other alignment values, but we must maintain the * relationship HH alignment <= LL alignment. - * - * LL_ALLOCATED_SPACE also takes into account the tailroom the device - * may need. */ #define LL_RESERVED_SPACE(dev) \ ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) -#define LL_ALLOCATED_SPACE(dev) \ - ((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From bdb6e697b2a76c541960b86ab8fda88f3de1adf2 Mon Sep 17 00:00:00 2001 From: Dinesh Kumar Sharma Date: Fri, 18 Nov 2011 01:22:05 +0000 Subject: Phonet: set the pipe handle using setsockopt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides flexibility to set the pipe handle using setsockopt. The pipe can be enabled (if disabled) later using ioctl. Signed-off-by: Hemant Ramdasi Signed-off-by: Dinesh Kumar Sharma Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index f53a4167c5f4..f48bfc80cb4b 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -38,6 +38,7 @@ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 #define PNPIPE_HANDLE 3 +#define PNPIPE_INITSTATE 4 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC @@ -49,6 +50,7 @@ /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) +#define SIOCPNENABLEPIPE (SIOCPROTOPRIVATE + 13) #define SIOCPNADDRESOURCE (SIOCPROTOPRIVATE + 14) #define SIOCPNDELRESOURCE (SIOCPROTOPRIVATE + 15) -- cgit v1.2.3 From 8ee4dd9f063ce59c08f3ce283ca03306131aaf3a Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Fri, 18 Nov 2011 23:53:29 +0100 Subject: debugfs: print_regs32: make regs array a const pointer Signed-off-by: Alessandro Rubini Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 5e6b01f6db4c..e8c3abc60811 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -90,7 +90,7 @@ struct dentry *debugfs_create_regset32(const char *name, mode_t mode, struct dentry *parent, struct debugfs_regset32 *regset); -int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, +int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); bool debugfs_initialized(void); -- cgit v1.2.3 From a5c86e986f0b2fe779f13cf53ce6e9f467b03950 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sat, 19 Nov 2011 02:33:57 -0800 Subject: hugetlb: remove dummy definitions of HPAGE_MASK and HPAGE_SIZE Dummy, non-zero definitions for HPAGE_MASK and HPAGE_SIZE were added in 51c6f666fceb ("mm: ZAP_BLOCK causes redundant work") to avoid a divide by zero in generic kernel code. That code has since been removed, but probably should never have been added in the first place: we don't want HPAGE_SIZE to act like PAGE_SIZE for code that is working with hugepages, for example, when the dependency on CONFIG_HUGETLB_PAGE has not been fulfilled. Because hugepage size can differ from architecture to architecture, each is required to have their own definitions for both HPAGE_MASK and HPAGE_SIZE. This is always done in arch/*/include/asm/page.h. So, just remove the dummy and dangerous definitions since they are no longer needed and reveals the correct dependencies. Tested on architectures using the definitions with allyesconfig: x86 (even with thp), hppa, mips, powerpc, s390, sh3, sh4, sparc, and sparc64, and with defconfig on ia64. Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 19644e0016bd..d9d6c868b86b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -110,11 +110,6 @@ static inline void copy_huge_page(struct page *dst, struct page *src) #define hugetlb_change_protection(vma, address, end, newprot) -#ifndef HPAGE_MASK -#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ -#define HPAGE_SIZE PAGE_SIZE -#endif - #endif /* !CONFIG_HUGETLB_PAGE */ #define HUGETLB_ANON_FILE "anon_hugepage" -- cgit v1.2.3 From 4c691664583ef6a91f9ed0e08a75fbd30a5ffd5c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Nov 2011 16:53:00 +0000 Subject: regmap: Remove indexed cache type There should be no situation where it offers any advantage over rbtree and there are no current users so remove the code for simplicity. Signed-off-by: Mark Brown --- include/linux/regmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 458f15f4c37c..81dfe0acb20c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -23,7 +23,6 @@ struct spi_device; /* An enum of all the supported cache types */ enum regcache_type { REGCACHE_NONE, - REGCACHE_INDEXED, REGCACHE_RBTREE, REGCACHE_COMPRESSED }; -- cgit v1.2.3 From 37f07023d30708b5da091fe6d6be9b60783c6d82 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Thu, 17 Nov 2011 14:30:55 +0000 Subject: net: Change mii to ethtool advertisement function names This patch implements advice by Ben Hutchings to change the mii side of the function names to look more like the register whose values they convert. New LPA translation functions have been added as well. Signed-off-by: Matt Carlson Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/mii.h | 64 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 6697b9112014..2783eca629a0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -241,14 +241,14 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, } /** - * ethtool_adv_to_mii_100bt + * ethtool_adv_to_mii_adv_t * @ethadv: the ethtool advertisement settings * * A small helper function that translates ethtool advertisement * settings to phy autonegotiation advertisements for the * MII_ADVERTISE register. */ -static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv) +static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv) { u32 result = 0; @@ -269,13 +269,13 @@ static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv) } /** - * mii_adv_to_ethtool_100bt + * mii_adv_to_ethtool_adv_t * @adv: value of the MII_ADVERTISE register * * A small helper function that translates MII_ADVERTISE bits * to ethtool advertisement settings. */ -static inline u32 mii_adv_to_ethtool_100bt(u32 adv) +static inline u32 mii_adv_to_ethtool_adv_t(u32 adv) { u32 result = 0; @@ -296,14 +296,14 @@ static inline u32 mii_adv_to_ethtool_100bt(u32 adv) } /** - * ethtool_adv_to_mii_1000T + * ethtool_adv_to_mii_ctrl1000_t * @ethadv: the ethtool advertisement settings * * A small helper function that translates ethtool advertisement * settings to phy autonegotiation advertisements for the * MII_CTRL1000 register when in 1000T mode. */ -static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv) +static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) { u32 result = 0; @@ -316,14 +316,14 @@ static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv) } /** - * mii_adv_to_ethtool_1000T + * mii_ctrl1000_to_ethtool_adv_t * @adv: value of the MII_CTRL1000 register * * A small helper function that translates MII_CTRL1000 * bits, when in 1000Base-T mode, to ethtool * advertisement settings. */ -static inline u32 mii_adv_to_ethtool_1000T(u32 adv) +static inline u32 mii_ctrl1000_to_ethtool_adv_t(u32 adv) { u32 result = 0; @@ -335,17 +335,33 @@ static inline u32 mii_adv_to_ethtool_1000T(u32 adv) return result; } -#define mii_lpa_to_ethtool_100bt(lpa) mii_adv_to_ethtool_100bt(lpa) +/** + * mii_lpa_to_ethtool_lpa_t + * @adv: value of the MII_LPA register + * + * A small helper function that translates MII_LPA + * bits, when in 1000Base-T mode, to ethtool + * LP advertisement settings. + */ +static inline u32 mii_lpa_to_ethtool_lpa_t(u32 lpa) +{ + u32 result = 0; + + if (lpa & LPA_LPACK) + result |= ADVERTISED_Autoneg; + + return result | mii_adv_to_ethtool_adv_t(lpa); +} /** - * mii_lpa_to_ethtool_1000T + * mii_stat1000_to_ethtool_lpa_t * @adv: value of the MII_STAT1000 register * * A small helper function that translates MII_STAT1000 * bits, when in 1000Base-T mode, to ethtool * advertisement settings. */ -static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa) +static inline u32 mii_stat1000_to_ethtool_lpa_t(u32 lpa) { u32 result = 0; @@ -358,14 +374,14 @@ static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa) } /** - * ethtool_adv_to_mii_1000X + * ethtool_adv_to_mii_adv_x * @ethadv: the ethtool advertisement settings * * A small helper function that translates ethtool advertisement * settings to phy autonegotiation advertisements for the * MII_CTRL1000 register when in 1000Base-X mode. */ -static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv) +static inline u32 ethtool_adv_to_mii_adv_x(u32 ethadv) { u32 result = 0; @@ -382,14 +398,14 @@ static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv) } /** - * mii_adv_to_ethtool_1000X + * mii_adv_to_ethtool_adv_x * @adv: value of the MII_CTRL1000 register * * A small helper function that translates MII_CTRL1000 * bits, when in 1000Base-X mode, to ethtool * advertisement settings. */ -static inline u32 mii_adv_to_ethtool_1000X(u32 adv) +static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) { u32 result = 0; @@ -405,6 +421,24 @@ static inline u32 mii_adv_to_ethtool_1000X(u32 adv) return result; } +/** + * mii_lpa_to_ethtool_lpa_x + * @adv: value of the MII_LPA register + * + * A small helper function that translates MII_LPA + * bits, when in 1000Base-X mode, to ethtool + * LP advertisement settings. + */ +static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) +{ + u32 result = 0; + + if (lpa & LPA_LPACK) + result |= ADVERTISED_Autoneg; + + return result | mii_adv_to_ethtool_adv_x(lpa); +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly There is no reason to export two functions for entering the refrigerator. Calling refrigerator() instead of try_to_freeze() doesn't save anything noticeable or removes any race condition. * Rename refrigerator() to __refrigerator() and make it return bool indicating whether it scheduled out for freezing. * Update try_to_freeze() to return bool and relay the return value of __refrigerator() if freezing(). * Convert all refrigerator() users to try_to_freeze(). * Update documentation accordingly. * While at it, add might_sleep() to try_to_freeze(). Signed-off-by: Tejun Heo Cc: Samuel Ortiz Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Steven Whitehouse Cc: Andrew Morton Cc: Jan Kara Cc: KONISHI Ryusuke Cc: Christoph Hellwig --- include/linux/freezer.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a5386e3ee756..7a9427e9fe47 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,18 +47,17 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern void refrigerator(void); +extern bool __refrigerator(void); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); -static inline int try_to_freeze(void) +static inline bool try_to_freeze(void) { - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; + might_sleep(); + if (likely(!freezing(current))) + return false; + return __refrigerator(); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -181,12 +180,12 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void refrigerator(void) {} +static inline bool __refrigerator(void) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} -static inline int try_to_freeze(void) { return 0; } +static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} -- cgit v1.2.3 From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: implement and use kthread_freezable_should_stop() Writeback and thinkpad_acpi have been using thaw_process() to prevent deadlock between the freezer and kthread_stop(); unfortunately, this is inherently racy - nothing prevents freezing from happening between thaw_process() and kthread_stop(). This patch implements kthread_freezable_should_stop() which enters refrigerator if necessary but is guaranteed to return if kthread_stop() is invoked. Both thaw_process() users are converted to use the new function. Note that this deadlock condition exists for many of freezable kthreads. They need to be converted to use the new should_stop or freezable workqueue. Tested with synthetic test case. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Cc: Jens Axboe Cc: Oleg Nesterov --- include/linux/freezer.h | 6 +++--- include/linux/kthread.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7a9427e9fe47..d02b78448b0f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,7 +47,7 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern bool __refrigerator(void); +extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); @@ -57,7 +57,7 @@ static inline bool try_to_freeze(void) might_sleep(); if (likely(!freezing(current))) return false; - return __refrigerator(); + return __refrigerator(false); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -180,7 +180,7 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline bool __refrigerator(void) { return false; } +static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 5cac19b3a266..0714b24c0e45 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -35,6 +35,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); int kthread_should_stop(void); +bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); int kthreadd(void *unused); -- cgit v1.2.3 From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: rename thaw_process() to __thaw_task() and simplify the implementation thaw_process() now has only internal users - system and cgroup freezers. Remove the unnecessary return value, rename, unexport and collapse __thaw_process() into it. This will help further updates to the freezer code. -v3: oom_kill grew a use of thaw_process() while this patch was pending. Convert it to use __thaw_task() for now. In the longer term, this should be handled by allowing tasks to die if killed even if it's frozen. -v2: minor style update as suggested by Matt. Signed-off-by: Tejun Heo Cc: Paul Menage Cc: Matt Helsley --- include/linux/freezer.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index d02b78448b0f..ba4f512d2938 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -45,7 +45,7 @@ static inline bool should_send_signal(struct task_struct *p) } /* Takes and releases task alloc lock using task_lock() */ -extern int thaw_process(struct task_struct *p); +extern void __thaw_task(struct task_struct *t); extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); @@ -178,7 +178,6 @@ static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} -static inline int thaw_process(struct task_struct *p) { return 1; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } -- cgit v1.2.3 From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: kill PF_FREEZING With the previous changes, there's no meaningful difference between PF_FREEZING and PF_FROZEN. Remove PF_FREEZING and use PF_FROZEN instead in task_contributes_to_load(). Signed-off-by: Tejun Heo --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2c..d12bd03b688f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FREEZING) == 0) + (task->flags & PF_FROZEN) == 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -1773,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -- cgit v1.2.3 From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: clean up freeze_processes() failure path freeze_processes() failure path is rather messy. Freezing is canceled for workqueues and tasks which aren't frozen yet but frozen tasks are left alone and should be thawed by the caller and of course some callers (xen and kexec) didn't do it. This patch updates __thaw_task() to handle cancelation correctly and makes freeze_processes() and freeze_kernel_threads() call thaw_processes() on failure instead so that the system is fully thawed on failure. Unnecessary [suspend_]thaw_processes() calls are removed from kernel/power/hibernate.c, suspend.c and user.c. While at it, restructure error checking if clause in suspend_prepare() to be less weird. -v2: Srivatsa spotted missing removal of suspend_thaw_processes() in suspend_prepare() and error in commit message. Updated. Signed-off-by: Tejun Heo Acked-by: Srivatsa S. Bhat --- include/linux/freezer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index ba4f512d2938..93f411a52872 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -61,7 +61,6 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); -extern void cancel_freezing(struct task_struct *p); #ifdef CONFIG_CGROUP_FREEZER extern int cgroup_freezing_or_frozen(struct task_struct *task); -- cgit v1.2.3 From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: cgroup_freezer: prepare for removal of TIF_FREEZE TIF_FREEZE will be removed soon and freezing() will directly test whether any freezing condition is in effect. Make the following changes in preparation. * Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it return bool. * Make cgroup_freezing() access task_freezer() under rcu read lock instead of task_lock(). This makes the state dereferencing racy against task moving to another cgroup; however, it was already racy without this change as ->state dereference wasn't synchronized. This will be later dealt with using attach hooks. * freezer->state is now set before trying to push tasks into the target state. -v2: Oleg pointed out that freeze_change_state() was setting freeze->state incorrectly to CGROUP_FROZEN instead of CGROUP_FREEZING. Fixed. -v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke try_to_freeze_cgroup() regardless of the current state. Patch updated such that the actual freeze/thaw operations are always performed on invocation. This shouldn't make any difference unless something is broken. Signed-off-by: Tejun Heo Acked-by: Paul Menage Cc: Li Zefan Cc: Oleg Nesterov --- include/linux/freezer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 93f411a52872..b2b4abc5a739 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -63,11 +63,11 @@ static inline bool try_to_freeze(void) extern bool freeze_task(struct task_struct *p, bool sig_only); #ifdef CONFIG_CGROUP_FREEZER -extern int cgroup_freezing_or_frozen(struct task_struct *task); +extern bool cgroup_freezing(struct task_struct *task); #else /* !CONFIG_CGROUP_FREEZER */ -static inline int cgroup_freezing_or_frozen(struct task_struct *task) +static inline bool cgroup_freezing(struct task_struct *task) { - return 0; + return false; } #endif /* !CONFIG_CGROUP_FREEZER */ -- cgit v1.2.3 From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE Using TIF_FREEZE for freezing worked when there was only single freezing condition (the PM one); however, now there is also the cgroup_freezer and single bit flag is getting clumsy. thaw_processes() is already testing whether cgroup freezing in in effect to avoid thawing tasks which were frozen by both PM and cgroup freezers. This is racy (nothing prevents race against cgroup freezing) and fragile. A much simpler way is to test actual freeze conditions from freezing() - ie. directly test whether PM or cgroup freezing is in effect. This patch adds variables to indicate whether and what type of freezing conditions are in effect and reimplements freezing() such that it directly tests whether any of the two freezing conditions is active and the task should freeze. On fast path, freezing() is still very cheap - it only tests system_freezing_cnt. This makes the clumsy dancing aroung TIF_FREEZE unnecessary and freeze/thaw operations more usual - updating state variables for the new state and nudging target tasks so that they notice the new state and comply. As long as the nudging happens after state update, it's race-free. * This allows use of freezing() in freeze_task(). Replace the open coded tests with freezing(). * p != current test is added to warning printing conditions in try_to_freeze_tasks() failure path. This is necessary as freezing() is now true for the task which initiated freezing too. -v2: Oleg pointed out that re-freezing FROZEN cgroup could increment system_freezing_cnt. Fixed. Signed-off-by: Tejun Heo Acked-by: Paul Menage (for the cgroup portions) --- include/linux/freezer.h | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b2b4abc5a739..8e29f2b7ce11 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -5,8 +5,13 @@ #include #include +#include #ifdef CONFIG_FREEZER +extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +extern bool pm_freezing; /* PM freezing in effect */ +extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ + /* * Check if a process has been frozen */ @@ -15,28 +20,16 @@ static inline int frozen(struct task_struct *p) return p->flags & PF_FROZEN; } -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_FREEZE); -} +extern bool freezing_slow_path(struct task_struct *p); /* - * Request that a process be frozen - */ -static inline void set_freeze_flag(struct task_struct *p) -{ - set_tsk_thread_flag(p, TIF_FREEZE); -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request + * Check if there is a request to freeze a process */ -static inline void clear_freeze_flag(struct task_struct *p) +static inline bool freezing(struct task_struct *p) { - clear_tsk_thread_flag(p, TIF_FREEZE); + if (likely(!atomic_read(&system_freezing_cnt))) + return false; + return freezing_slow_path(p); } static inline bool should_send_signal(struct task_struct *p) @@ -174,9 +167,7 @@ static inline void set_freezable_with_signal(void) }) #else /* !CONFIG_FREEZER */ static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void set_freeze_flag(struct task_struct *p) {} -static inline void clear_freeze_flag(struct task_struct *p) {} +static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } -- cgit v1.2.3 From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: remove should_send_signal() and update frozen() should_send_signal() is only used in freezer.c. Exporting them only increases chance of abuse. Open code the two users and remove it. Update frozen() to return bool. Signed-off-by: Tejun Heo --- include/linux/freezer.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8e29f2b7ce11..3d50913d39d0 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -15,7 +15,7 @@ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ /* * Check if a process has been frozen */ -static inline int frozen(struct task_struct *p) +static inline bool frozen(struct task_struct *p) { return p->flags & PF_FROZEN; } @@ -32,11 +32,6 @@ static inline bool freezing(struct task_struct *p) return freezing_slow_path(p); } -static inline bool should_send_signal(struct task_struct *p) -{ - return !(p->flags & PF_FREEZER_NOSIG); -} - /* Takes and releases task alloc lock using task_lock() */ extern void __thaw_task(struct task_struct *t); @@ -166,7 +161,7 @@ static inline void set_freezable_with_signal(void) __retval; \ }) #else /* !CONFIG_FREEZER */ -static inline int frozen(struct task_struct *p) { return 0; } +static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } -- cgit v1.2.3 From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: fix set_freezable[_with_signal]() race A kthread doing set_freezable*() may race with on-going PM freeze and the freezer might think all tasks are frozen while the new freezable kthread is merrily proceeding to execute code paths which aren't supposed to be executing during PM freeze. Reimplement set_freezable[_with_signal]() using __set_freezable() such that freezable PF flags are modified under freezer_lock and try_to_freeze() is called afterwards. This eliminates race condition against freezing. Note: Separated out from larger patch to resolve fix order dependency Oleg pointed out. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- include/linux/freezer.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 3d50913d39d0..a0f1b3a3604f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,6 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -106,18 +107,18 @@ static inline int freezer_should_skip(struct task_struct *p) /* * Tell the freezer that the current task should be frozen by it */ -static inline void set_freezable(void) +static inline bool set_freezable(void) { - current->flags &= ~PF_NOFREEZE; + return __set_freezable(false); } /* * Tell the freezer that the current task should be frozen by it and that it * should send a fake signal to the task to freeze it. */ -static inline void set_freezable_with_signal(void) +static inline bool set_freezable_with_signal(void) { - current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); + return __set_freezable(true); } /* -- cgit v1.2.3 From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: remove unused @sig_only from freeze_task() After "freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE", freezing() returns authoritative answer on whether the current task should freeze or not and freeze_task() doesn't need or use @sig_only. Remove it. While at it, rewrite function comment for freeze_task() and rename @sig_only to @user_only in try_to_freeze_tasks(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- include/linux/freezer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a0f1b3a3604f..a28842e588f4 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -48,7 +48,7 @@ static inline bool try_to_freeze(void) return __refrigerator(false); } -extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool freeze_task(struct task_struct *p); extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER -- cgit v1.2.3 From 8b60b07805d557542160d852874fa6a1b969184e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 11 Oct 2011 10:59:02 -0700 Subject: cfg80211: process regulatory DFS region for countries The wireless-regdb now has support for mapping a country to one DFS region. CRDA sends this to us now so process it so we can provide that hint to drivers. This will later be used by code for processing DFS in a way that meets the criteria for the DFS region the country belongs to. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index f9261c253735..6396819a7e41 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1170,6 +1170,10 @@ enum nl80211_commands { * probe-response frame. The DA field in the 802.11 header is zero-ed out, * to be filled by the FW. * + * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country + * abides to when initiating radiation on DFS channels. A country maps + * to one DFS region. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1408,6 +1412,8 @@ enum nl80211_attrs { NL80211_ATTR_PROBE_RESP, + NL80211_ATTR_DFS_REGION, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1916,6 +1922,21 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IBSS = 1<<8, }; +/** + * enum nl80211_dfs_regions - regulatory DFS regions + * + * @NL80211_DFS_UNSET: Country has no DFS master region specified + * @NL80211_DFS_FCC_: Country follows DFS master rules from FCC + * @NL80211_DFS_FCC_: Country follows DFS master rules from ETSI + * @NL80211_DFS_JP_: Country follows DFS master rules from JP/MKK/Telec + */ +enum nl80211_dfs_regions { + NL80211_DFS_UNSET = 0, + NL80211_DFS_FCC = 1, + NL80211_DFS_ETSI = 2, + NL80211_DFS_JP = 3, +}; + /** * enum nl80211_survey_info - survey information * -- cgit v1.2.3 From 7e7c8926b2f4e3453b8aeb39cd814d2af3fec24f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 18 Nov 2011 11:31:59 -0800 Subject: wireless: Support ht-capabilities over-rides. This allows users to disable features such as HT, HT40, and to modify the MCS, AMPDU, and AMSDU settings for drivers that support it. The MCS, AMPDU, and AMSDU features that may be disabled are are reported in the phy-info netlink message as a mask. Attemping to disable features that are not supported will take no affect, but will not return errors. This is to aid backwards compatibility in user-space apps that may not be clever enough to deal with parsing the the capabilities mask. This patch only enables the infrastructure. An additional patch will enable the feature in mac80211. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- include/linux/nl80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 6396819a7e41..97bfebfcce90 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1169,6 +1169,17 @@ enum nl80211_commands { * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire * probe-response frame. The DA field in the 802.11 header is zero-ed out, * to be filled by the FW. + * @NL80211_ATTR_DISABLE_HT: Force HT capable interfaces to disable + * this feature. Currently, only supported in mac80211 drivers. + * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the + * ATTR_HT_CAPABILITY to which attention should be paid. + * Currently, only mac80211 NICs support this feature. + * The values that may be configured are: + * MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40 + * AMPDU density and AMPDU factor. + * All values are treated as suggestions and may be ignored + * by the driver as required. The actual values may be seen in + * the station debugfs ht_caps file. * * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country * abides to when initiating radiation on DFS channels. A country maps @@ -1414,6 +1425,9 @@ enum nl80211_attrs { NL80211_ATTR_DFS_REGION, + NL80211_ATTR_DISABLE_HT, + NL80211_ATTR_HT_CAPABILITY_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From a2d7ec58ac09f30ab726f216827f7c7095b2a98f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Nov 2011 17:32:46 +0000 Subject: netfilter: use jump_label for nf_hooks On configs where CONFIG_JUMP_LABEL=y, we can replace in fast path a load/compare/conditional jump by a single jump with no dcache reference. Jump target is modified as soon as nf_hooks[pf][hook] switches from empty state to non empty states. jump_label state is kept outside of nf_hooks array so has no cost on cpu caches. This patch removes the test on CONFIG_NETFILTER_DEBUG : No need to call nf_hook_slow() at all if nf_hooks[pf][hook] is empty, this didnt give useful information, but slowed down things a lot. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 857f5026ced6..b809265607d0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -162,6 +162,24 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#if defined(CONFIG_JUMP_LABEL) +#include +extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + if (__builtin_constant_p(pf) && + __builtin_constant_p(hook)) + return static_branch(&nf_hooks_needed[pf][hook]); + + return !list_empty(&nf_hooks[pf][hook]); +} +#else +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + return !list_empty(&nf_hooks[pf][hook]); +} +#endif + int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -179,11 +197,9 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { -#ifndef CONFIG_NETFILTER_DEBUG - if (list_empty(&nf_hooks[pf][hook])) - return 1; -#endif - return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + if (nf_hooks_active(pf, hook)) + return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + return 1; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, -- cgit v1.2.3 From 1e5f9a23430e64fb56d9d5d8e1ca165ba1cfeb75 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 5 Oct 2011 14:40:59 +0100 Subject: ARM: amba: Move definition of struct amba_id to mod_devicetable.h The general kernel infrastructure for adding module alises during module post processing expects the affected device type identification structures in a common header . This patch simple moves struct amba_id to the common header, and adds the appropriate include in . Signed-off-by: Dave Martin --- include/linux/amba/bus.h | 7 +------ include/linux/mod_devicetable.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index fcbbe71a3cc1..724c69c40bb8 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -35,12 +36,6 @@ struct amba_device { unsigned int irq[AMBA_NR_IRQS]; }; -struct amba_id { - unsigned int id; - unsigned int mask; - void *data; -}; - struct amba_driver { struct device_driver drv; int (*probe)(struct amba_device *, const struct amba_id *); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 468819cdde87..83ac0713ed0a 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -542,4 +542,22 @@ struct isapnp_device_id { kernel_ulong_t driver_data; /* data private to the driver */ }; +/** + * struct amba_id - identifies a device on an AMBA bus + * @id: The significant bits if the hardware device ID + * @mask: Bitmask specifying which bits of the id field are significant when + * matching. A driver binds to a device when ((hardware device ID) & mask) + * == id. + * @data: Private data used by the driver. + */ +struct amba_id { + unsigned int id; + unsigned int mask; +#ifndef __KERNEL__ + kernel_ulong_t data; +#else + void *data; +#endif +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 570e57bcbcc4df5581b1e9c806ab2b16e96ea7d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Nov 2011 19:51:34 +0000 Subject: atm: use SKB_TRUESIZE() in atm_guess_pdu2truesize() SKB_TRUESIZE() provides a better approximation of expected skb truesize. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 49a83ca900ba..43ea1b2de3ee 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -452,7 +452,7 @@ void atm_dev_release_vccs(struct atm_dev *dev); static inline int atm_guess_pdu2truesize(int size) { - return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); + return SKB_TRUESIZE(size); } -- cgit v1.2.3 From 5bc1421e34ecfe0bd4b26dc3232b7d5e25179144 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 22 Nov 2011 05:10:51 +0000 Subject: net: add network priority cgroup infrastructure (v4) This patch adds in the infrastructure code to create the network priority cgroup. The cgroup, in addition to the standard processes file creates two control files: 1) prioidx - This is a read-only file that exports the index of this cgroup. This is a value that is both arbitrary and unique to a cgroup in this subsystem, and is used to index the per-device priority map 2) priomap - This is a writeable file. On read it reports a table of 2-tuples where name is the name of a network interface and priority is indicates the priority assigned to frames egresessing on the named interface and originating from a pid in this cgroup This cgroup allows for skb priority to be set prior to a root qdisc getting selected. This is benenficial for DCB enabled systems, in that it allows for any application to use dcb configured priorities so without application modification Signed-off-by: Neil Horman Signed-off-by: John Fastabend CC: Robert Love CC: "David S. Miller" Signed-off-by: David S. Miller --- include/linux/cgroup_subsys.h | 8 ++++++++ include/linux/netdevice.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac663c18776c..0bd390ce98b2 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -59,8 +59,16 @@ SUBSYS(net_cls) SUBSYS(blkio) #endif +/* */ + #ifdef CONFIG_CGROUP_PERF SUBSYS(perf) #endif /* */ + +#ifdef CONFIG_NETPRIO_CGROUP +SUBSYS(net_prio) +#endif + +/* */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3eb383a9b5ed..999bb264fe27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -50,6 +50,7 @@ #ifdef CONFIG_DCB #include #endif +#include #include @@ -1244,6 +1245,9 @@ struct net_device { #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; +#endif +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) + struct netprio_map __rcu *priomap; #endif /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; -- cgit v1.2.3 From 5eccdf5e06eb67779716ae26142402a1ae9b012c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 21 Nov 2011 06:53:46 +0000 Subject: tc: comment spelling fixes Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index c5336705921f..7281d5acf2f9 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -30,7 +30,7 @@ */ struct tc_stats { - __u64 bytes; /* NUmber of enqueues bytes */ + __u64 bytes; /* Number of enqueued bytes */ __u32 packets; /* Number of enqueued packets */ __u32 drops; /* Packets dropped because of lack of resources */ __u32 overlimits; /* Number of throttle events when this @@ -297,7 +297,7 @@ struct tc_htb_glob { __u32 debug; /* debug flags */ /* stats */ - __u32 direct_pkts; /* count of non shapped packets */ + __u32 direct_pkts; /* count of non shaped packets */ }; enum { TCA_HTB_UNSPEC, @@ -503,7 +503,7 @@ enum { }; #define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) -/* State transition probablities for 4 state model */ +/* State transition probabilities for 4 state model */ struct tc_netem_gimodel { __u32 p13; __u32 p31; -- cgit v1.2.3 From 4e3fd7a06dc20b2d8ec6892233ad2012968fe7b6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 21 Nov 2011 03:39:03 +0000 Subject: net: remove ipv6_addr_copy() C assignment can handle struct in6_addr copying. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3d8f9c44e27d..f15fd985b08a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -237,7 +237,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; dsin6->sin6_family = ssin6->sin6_family; - ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr); + dsin6->sin6_addr = ssin6->sin6_addr; return true; } #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ -- cgit v1.2.3 From 1f2149c1df50c8c712950872675f46e6e44629f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Nov 2011 10:57:41 +0000 Subject: net: remove netdev_alloc_page and use __GFP_COLD Given we dont use anymore the struct net_device *dev argument, and this interface brings litle benefit, remove netdev_{alloc|free}_page(), to debloat include/linux/skbuff.h a bit. (Some drivers used a mix of these interfaces and alloc_pages()) When allocating a page given to device for DMA transfer (device to memory), it makes sense to use a cold one (__GFP_COLD) Signed-off-by: Eric Dumazet CC: Jeff Kirsher CC: Dimitris Michailidis Signed-off-by: David S. Miller --- include/linux/skbuff.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 09b7ea566d66..cec0657d0d32 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1668,38 +1668,6 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } -/** - * __netdev_alloc_page - allocate a page for ps-rx on a specific device - * @dev: network device to receive on - * @gfp_mask: alloc_pages_node mask - * - * Allocate a new page. dev currently unused. - * - * %NULL is returned if there is no free memory. - */ -static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) -{ - return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0); -} - -/** - * netdev_alloc_page - allocate a page for ps-rx on a specific device - * @dev: network device to receive on - * - * Allocate a new page. dev currently unused. - * - * %NULL is returned if there is no free memory. - */ -static inline struct page *netdev_alloc_page(struct net_device *dev) -{ - return __netdev_alloc_page(dev, GFP_ATOMIC); -} - -static inline void netdev_free_page(struct net_device *dev, struct page *page) -{ - __free_page(page); -} - /** * skb_frag_page - retrieve the page refered to by a paged fragment * @frag: the paged fragment -- cgit v1.2.3 From 5151412dd4338b273afdb107c3772528e9e67d92 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Nov 2011 10:59:13 +0100 Subject: block: initialize request_queue's numa node during struct request_queue is allocated with __GFP_ZERO so its "node" field is zero before initialization. This causes an oops if node 0 is offline in the page allocator because its zonelists are not initialized. From Dave Young's dmesg: SRAT: Node 1 PXM 2 0-d0000000 SRAT: Node 1 PXM 2 100000000-330000000 SRAT: Node 0 PXM 1 330000000-630000000 Initmem setup node 1 0000000000000000-000000000affb000 ... Built 1 zonelists in Node order, mobility grouping on. ... BUG: unable to handle kernel paging request at 0000000000001c08 IP: [] __alloc_pages_nodemask+0xb5/0x870 and __alloc_pages_nodemask+0xb5 translates to a NULL pointer on zonelist->_zonerefs. The fix is to initialize q->node at the time of allocation so the correct node is passed to the slab allocator later. Since blk_init_allocated_queue_node() is no longer needed, merge it with blk_init_allocated_queue(). [rientjes@google.com: changelog, initializing q->node] Cc: stable@vger.kernel.org [2.6.37+] Reported-by: Dave Young Signed-off-by: Mike Snitzer Signed-off-by: David Rientjes Tested-by: Dave Young Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c7a6d3b5bc7b..94acd8172b5b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -805,9 +805,6 @@ extern void blk_unprep_request(struct request *); */ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, - request_fn_proc *, - spinlock_t *, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); -- cgit v1.2.3 From 67820021dc9c8da37f773025190280f55f3626d4 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 23 Nov 2011 11:33:07 +0100 Subject: i2c: Delete ANY_I2C_BUS Last piece of code using ANY_I2C_BUS was deleted almost 2 years ago, so ANY_I2C_BUS can go away as well. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a81bf6d23b3e..07d103a06d64 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -432,9 +432,6 @@ void i2c_unlock_adapter(struct i2c_adapter *); /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -/* The numbers to use to set I2C bus address */ -#define ANY_I2C_BUS 0xffff - /* Construct an I2C_CLIENT_END-terminated array of i2c addresses */ #define I2C_ADDRS(addr, addrs...) \ ((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END }) -- cgit v1.2.3 From 780dc9ba4eb682a89be48d5b814feae6722a19e0 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Tue, 8 Nov 2011 18:54:10 +0530 Subject: regulator: TPS65910: Fix VDD1/2 voltage selector count Count of selector voltage is required for regulator_set_voltage to work via set_voltage_sel. VDD1/2 currently have it as zero, so regulator_set_voltage won't work for VDD1/2. Update count (n_voltages) for VDD1/2. Output Voltage = (step value * 12.5 mV + 562.5 mV) * gain With above expr, number of voltages that can be selected is step value count * gain count constant for gain count will be called VDD1_2_NUM_VOLT_COARSE existing constant for step value count is VDD1_2_NUM_VOLTS, use VDD1_2_NUM_VOLT_FINE instead to make clear that step value is not the only component in deciding selectable voltage count Signed-off-by: Afzal Mohammed Signed-off-by: Mark Brown --- include/linux/mfd/tps65910.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 82b4c8801a4f..8bf2cb9502dd 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -243,7 +243,8 @@ /*Registers VDD1, VDD2 voltage values definitions */ -#define VDD1_2_NUM_VOLTS 73 +#define VDD1_2_NUM_VOLT_FINE 73 +#define VDD1_2_NUM_VOLT_COARSE 3 #define VDD1_2_MIN_VOLT 6000 #define VDD1_2_OFFSET 125 -- cgit v1.2.3 From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: kill unused set_freezable_with_signal() There's no in-kernel user of set_freezable_with_signal() left. Mixing TIF_SIGPENDING with kernel threads can lead to nasty corner cases as kernel threads never travel signal delivery path on their own. e.g. the current implementation is buggy in the cancelation path of __thaw_task(). It calls recalc_sigpending_and_wake() in an attempt to clear TIF_SIGPENDING but the function never clears it regardless of sigpending state. This means that signallable freezable kthreads may continue executing with !freezing() && stuck TIF_SIGPENDING, which can be troublesome. This patch removes set_freezable_with_signal() along with PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer. User tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious sigpending is dealt with in the usual signal delivery path. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- include/linux/freezer.h | 20 +------------------- include/linux/sched.h | 1 - 2 files changed, 1 insertion(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a28842e588f4..a33550fc05c5 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,7 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p); -extern bool __set_freezable(bool with_signal); +extern bool set_freezable(void); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -104,23 +104,6 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } -/* - * Tell the freezer that the current task should be frozen by it - */ -static inline bool set_freezable(void) -{ - return __set_freezable(false); -} - -/* - * Tell the freezer that the current task should be frozen by it and that it - * should send a fake signal to the task to freeze it. - */ -static inline bool set_freezable_with_signal(void) -{ - return __set_freezable(true); -} - /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -176,7 +159,6 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} -static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index d12bd03b688f..2f90470ad843 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1788,7 +1788,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other -- cgit v1.2.3 From 24b7ead3fb0bae267c2ee50898eb4c13aedd1e9f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: fix wait_event_freezable/__thaw_task races wait_event_freezable() and friends stop the waiting if try_to_freeze() fails. This is not right, we can race with __thaw_task() and in this case - wait_event_freezable() returns the wrong ERESTARTSYS - wait_event_freezable_timeout() can return the positive value while condition == F Change the code to always check __retval/condition before return. Note: with or without this patch the timeout logic looks strange, probably we should recalc timeout if try_to_freeze() returns T. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- include/linux/freezer.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a33550fc05c5..09570ac22be6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -122,28 +122,30 @@ static inline int freezer_should_skip(struct task_struct *p) #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible(wq, \ (condition) || freezing(current)); \ - if (__retval && !freezing(current)) \ + if (__retval || (condition)) \ break; \ - else if (!(condition)) \ - __retval = -ERESTARTSYS; \ - } while (try_to_freeze()); \ + try_to_freeze(); \ + } \ __retval; \ }) - #define wait_event_freezable_timeout(wq, condition, timeout) \ ({ \ long __retval = timeout; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible_timeout(wq, \ (condition) || freezing(current), \ __retval); \ - } while (try_to_freeze()); \ + if (__retval <= 0 || (condition)) \ + break; \ + try_to_freeze(); \ + } \ __retval; \ }) + #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } -- cgit v1.2.3 From b84d435cc228e87951f3bbabf6cc4a5f25d5fb16 Mon Sep 17 00:00:00 2001 From: Christine Chan Date: Mon, 7 Nov 2011 19:48:27 -0800 Subject: debugobjects: Extend to assert that an object is initialized Calling del_timer_sync() on an uninitialized timer leads to a never ending loop in lock_timer_base() that spins checking for a non-NULL timer base. Add an assertion to debugobjects to catch usage of uninitialized objects so that we can initialize timers in the del_timer_sync() path before it calls lock_timer_base(). [ sboyd@codeaurora.org: Clarify commit message ] Signed-off-by: Christine Chan Signed-off-by: Stephen Boyd Cc: John Stultz Link: http://lkml.kernel.org/r/1320724108-20788-3-git-send-email-sboyd@codeaurora.org Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/debugobjects.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 65970b811e22..0e5f5785d9f2 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -46,6 +46,8 @@ struct debug_obj { * fails * @fixup_free: fixup function, which is called when the free check * fails + * @fixup_assert_init: fixup function, which is called when the assert_init + * check fails */ struct debug_obj_descr { const char *name; @@ -54,6 +56,7 @@ struct debug_obj_descr { int (*fixup_activate) (void *addr, enum debug_obj_state state); int (*fixup_destroy) (void *addr, enum debug_obj_state state); int (*fixup_free) (void *addr, enum debug_obj_state state); + int (*fixup_assert_init)(void *addr, enum debug_obj_state state); }; #ifdef CONFIG_DEBUG_OBJECTS @@ -64,6 +67,7 @@ extern void debug_object_activate (void *addr, struct debug_obj_descr *descr); extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); extern void debug_object_free (void *addr, struct debug_obj_descr *descr); +extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr); /* * Active state: @@ -89,6 +93,8 @@ static inline void debug_object_destroy (void *addr, struct debug_obj_descr *descr) { } static inline void debug_object_free (void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { } static inline void debug_objects_early_init(void) { } static inline void debug_objects_mem_init(void) { } -- cgit v1.2.3 From 6a76b7a9cc93dec6ae58d70f1257d234291908e0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Mon, 21 Nov 2011 23:32:35 +0100 Subject: PM / Memory-hotplug: Avoid task freezing failures The lock_system_sleep() function is used in the memory hotplug code at several places in order to implement mutual exclusion with hibernation. However, this function tries to acquire the 'pm_mutex' lock using mutex_lock() and hence blocks in TASK_UNINTERRUPTIBLE state if it doesn't get the lock. This would lead to task freezing failures and hence hibernation failure as a consequence, even though the hibernation call path successfully acquired the lock. But it is to be noted that, since this task tries to acquire pm_mutex, if it blocks due to this, we are *100% sure* that this task is not going to run as long as hibernation sequence is in progress, since hibernation releases 'pm_mutex' only at the very end, when everything is done. And this means, this task is going to be anyway blocked for much more longer than what the freezer intends to achieve; which means, freezing and thawing doesn't really make any difference to this task! So, to fix freezing failures, we just ask the freezer to skip freezing this task, since it is already "frozen enough". But instead of calling freezer_do_not_count() and freezer_count() as it is, we use only the relevant parts of those functions, because restrictions such as 'the task should be a userspace one' etc., might not be relevant in this scenario. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 57a692432f8a..1f7fff47cfac 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -380,12 +380,16 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { + /* simplified freezer_do_not_count() */ + current->flags |= PF_FREEZER_SKIP; mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); + /* simplified freezer_count() */ + current->flags &= ~PF_FREEZER_SKIP; } #endif -- cgit v1.2.3 From fe1a7fe2c4456679b3402f04268bdfafca7b127a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 15 Nov 2011 16:17:18 +0200 Subject: virtio-mmio: Correct the name of the guest features selector Guest features selector spelling mistake. Cc: Pawel Moll Cc: Rusty Russell Cc: virtualization@lists.linux-foundation.org Signed-off-by: Sasha Levin Signed-off-by: Rusty Russell --- include/linux/virtio_mmio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h index 27c7edefbc86..5c7b6f0daef8 100644 --- a/include/linux/virtio_mmio.h +++ b/include/linux/virtio_mmio.h @@ -63,7 +63,7 @@ #define VIRTIO_MMIO_GUEST_FEATURES 0x020 /* Activated features set selector - Write Only */ -#define VIRTIO_MMIO_GUEST_FEATURES_SET 0x024 +#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024 /* Guest's memory page size in bytes - Write Only */ #define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 -- cgit v1.2.3 From e6af578c5305be693a1bc7f4dc7b51dd82d41425 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 17 Nov 2011 17:41:15 +0200 Subject: virtio-pci: make reset operation safer virtio pci device reset actually just does an I/O write, which in PCI is really posted, that is it can complete on CPU before the device has received it. Further, interrupts might have been pending on another CPU, so device callback might get invoked after reset. This conflicts with how drivers use reset, which is typically: reset unregister a callback running after reset completed can race with unregister, potentially leading to use after free bugs. Fix by flushing out the write, and flushing pending interrupts. This assumes that device is never reset from its vq/config callbacks, or in parallel with being added/removed, document this assumption. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index add4790b21fe..e9e72bda1b72 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -85,6 +85,8 @@ * @reset: reset the device * vdev: the virtio device * After this, status and feature negotiation must be done again + * Device must not be reset from its vq/config callbacks, or in + * parallel with being added/removed. * @find_vqs: find virtqueues and instantiate them. * vdev: the virtio_device * nvqs: the number of virtqueues to find -- cgit v1.2.3 From 62c9ea6b120688d800b4d892eaf737c20a73e86b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 25 Nov 2011 00:44:55 +0100 Subject: Freezer: fix more fallout from the thaw_process rename Commit 944e192db53c "freezer: rename thaw_process() to __thaw_task() and simplify the implementation" did not create a !CONFIG_FREEZER version of __thaw_task(). Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 09570ac22be6..c1ee2833655e 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -149,6 +149,7 @@ static inline int freezer_should_skip(struct task_struct *p) #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } +static inline void __thaw_task(struct task_struct *t) {} static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } -- cgit v1.2.3 From cf50dcc24f82a6dc2bce523eec2a979eb1b106e2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 25 Nov 2011 14:32:52 +0000 Subject: dsa: Change dsa_uses_{dsa, trailer}_tags() into inline functions eth_type_trans() will use these functions if DSA is enabled, which blocks building DSA as a module. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 999bb264fe27..63721a69804c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1080,7 +1080,7 @@ struct net_device { struct vlan_group __rcu *vlgrp; /* VLAN group */ #endif #ifdef CONFIG_NET_DSA - void *dsa_ptr; /* dsa specific data */ + struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ #endif void *atalk_ptr; /* AppleTalk link */ struct in_device __rcu *ip_ptr; /* IPv4 specific data */ -- cgit v1.2.3 From 34a430d7bd26b35ca3a7d3fc83663de8ea6e33f6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 25 Nov 2011 14:38:38 +0000 Subject: dsa: Allow core and drivers to be built as modules Change the kconfig types to tristate and adjust the condition for declaring net_device::dsa_ptr to allow for this. Adjust the makefile so that if NET_DSA_MV88E6123_61_65=y and NET_DSA_MV88E6131=m or vice versa then both drivers are built-in. We could leave these options as bool and make NET_DSA_MV88E6XXX a user-selected option, but that would break existing configurations. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 63721a69804c..87f7353a2407 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1079,7 +1079,7 @@ struct net_device { #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) struct vlan_group __rcu *vlgrp; /* VLAN group */ #endif -#ifdef CONFIG_NET_DSA +#if IS_ENABLED(CONFIG_NET_DSA) struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ #endif void *atalk_ptr; /* AppleTalk link */ -- cgit v1.2.3 From d11ead75672d655652dbfc1b1a2c359e5b65536d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 25 Nov 2011 14:40:26 +0000 Subject: net: Use IS_ENABLED() in netdevice.h as appropriate Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 87f7353a2407..ac9a4b9344ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -144,22 +144,20 @@ static inline bool dev_xmit_complete(int rc) * used. */ -#if defined(CONFIG_WLAN) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) # if defined(CONFIG_MAC80211_MESH) # define LL_MAX_HEADER 128 # else # define LL_MAX_HEADER 96 # endif -#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) +#elif IS_ENABLED(CONFIG_TR) # define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 #endif -#if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \ - !defined(CONFIG_NET_IPGRE) && !defined(CONFIG_NET_IPGRE_MODULE) && \ - !defined(CONFIG_IPV6_SIT) && !defined(CONFIG_IPV6_SIT_MODULE) && \ - !defined(CONFIG_IPV6_TUNNEL) && !defined(CONFIG_IPV6_TUNNEL_MODULE) +#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ + !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) #define MAX_HEADER LL_MAX_HEADER #else #define MAX_HEADER (LL_MAX_HEADER + 48) @@ -922,7 +920,7 @@ struct net_device_ops { int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); int (*ndo_setup_tc)(struct net_device *dev, u8 tc); -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); int (*ndo_fcoe_ddp_setup)(struct net_device *dev, @@ -937,7 +935,7 @@ struct net_device_ops { unsigned int sgc); #endif -#if defined(CONFIG_LIBFCOE) || defined(CONFIG_LIBFCOE_MODULE) +#if IS_ENABLED(CONFIG_LIBFCOE) #define NETDEV_FCOE_WWNN 0 #define NETDEV_FCOE_WWPN 1 int (*ndo_fcoe_get_wwn)(struct net_device *dev, @@ -1076,7 +1074,7 @@ struct net_device { /* Protocol specific pointers */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_group __rcu *vlgrp; /* VLAN group */ #endif #if IS_ENABLED(CONFIG_NET_DSA) @@ -1242,7 +1240,7 @@ struct net_device { struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; #endif -- cgit v1.2.3 From b30f8bdcfa7dd05f4268348f3388ff903132f28e Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 21 Nov 2011 08:57:56 +0000 Subject: eeprom_93cx6: Add data direction control. Some devices need to know if the data is to be output or read, so add a data direction into the eeprom structure to tell the driver whether the data line should be driven. The user in this case is the Micrel KS8851 which has a direction control for the EEPROM data line and thus needs to know whether to drive it (writing) or to tristate it for receiving. Signed-off-by: Ben Dooks Cc: Wolfram Sang Cc: Jean Delvare Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- include/linux/eeprom_93cx6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h index c4627cbdb8e0..e04546e9c592 100644 --- a/include/linux/eeprom_93cx6.h +++ b/include/linux/eeprom_93cx6.h @@ -46,6 +46,7 @@ * @register_write(struct eeprom_93cx6 *eeprom): handler to * write to the eeprom register by using all reg_* fields. * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines + * @drive_data: Set if we're driving the data line. * @reg_data_in: register field to indicate data input * @reg_data_out: register field to indicate data output * @reg_data_clock: register field to set the data clock @@ -62,6 +63,7 @@ struct eeprom_93cx6 { int width; + char drive_data; char reg_data_in; char reg_data_out; char reg_data_clock; -- cgit v1.2.3 From 072bc80156729f853e8bcafe1b17c48c74462887 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 21 Nov 2011 08:57:57 +0000 Subject: eeprom_93cx6: Add write support Add support for writing data to EEPROM. Signed-off-by: Ben Dooks Cc: Wolfram Sang Cc: Jean Delvare Cc: Linux Kernel Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- include/linux/eeprom_93cx6.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h index e04546e9c592..e50f98b0297a 100644 --- a/include/linux/eeprom_93cx6.h +++ b/include/linux/eeprom_93cx6.h @@ -33,6 +33,7 @@ #define PCI_EEPROM_WIDTH_93C86 8 #define PCI_EEPROM_WIDTH_OPCODE 3 #define PCI_EEPROM_WRITE_OPCODE 0x05 +#define PCI_EEPROM_ERASE_OPCODE 0x07 #define PCI_EEPROM_READ_OPCODE 0x06 #define PCI_EEPROM_EWDS_OPCODE 0x10 #define PCI_EEPROM_EWEN_OPCODE 0x13 @@ -74,3 +75,8 @@ extern void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word, u16 *data); extern void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word, __le16 *data, const u16 words); + +extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable); + +extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, + u8 addr, u16 data); -- cgit v1.2.3 From 49f5ed4250c757cb19d953fcac2737a35ca14d76 Mon Sep 17 00:00:00 2001 From: chas williams - CONTRACTOR Date: Tue, 22 Nov 2011 12:51:56 +0000 Subject: atm: eliminate atm_guess_pdu2truesize() Signed-off-by: Chas Williams - CONTRACTOR Signed-off-by: David S. Miller --- include/linux/atmdev.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 43ea1b2de3ee..f4ff882cb2da 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -445,16 +445,6 @@ void vcc_insert_socket(struct sock *sk); void atm_dev_release_vccs(struct atm_dev *dev); -/* - * This is approximately the algorithm used by alloc_skb. - * - */ - -static inline int atm_guess_pdu2truesize(int size) -{ - return SKB_TRUESIZE(size); -} - static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { -- cgit v1.2.3 From 876f6e67d1c617c098c67934a8d00b065bb9688b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sat, 26 Nov 2011 19:54:58 +0000 Subject: net/mlx4: move RSS related definitions to be global Towards adding RSS support for IB drivers/application who use the mlx4 HW, make the RSS related definitions global and change the mlx4_en driver to use them. Signed-off-by: Or Gerlitz Signed-off-by: Shlomo Pongratz Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 48cc4cb97858..6562ff6aa9d6 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -97,6 +97,33 @@ enum { MLX4_QP_BIT_RIC = 1 << 4, }; +enum { + MLX4_RSS_HASH_XOR = 0, + MLX4_RSS_HASH_TOP = 1, + + MLX4_RSS_UDP_IPV6 = 1 << 0, + MLX4_RSS_UDP_IPV4 = 1 << 1, + MLX4_RSS_TCP_IPV6 = 1 << 2, + MLX4_RSS_IPV6 = 1 << 3, + MLX4_RSS_TCP_IPV4 = 1 << 4, + MLX4_RSS_IPV4 = 1 << 5, + + /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */ + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24, + /* offset of being RSS indirection QP within mlx4_qp_context.flags */ + MLX4_RSS_QPC_FLAG_OFFSET = 13, +}; + +struct mlx4_rss_context { + __be32 base_qpn; + __be32 default_qpn; + u16 reserved; + u8 hash_fn; + u8 flags; + __be32 rss_key[10]; + __be32 base_qpn_udp; +}; + struct mlx4_qp_path { u8 fl; u8 reserved1[2]; -- cgit v1.2.3 From 559a9f1d354b577af28f84181751820ff7d29feb Mon Sep 17 00:00:00 2001 From: Oren Duer Date: Sat, 26 Nov 2011 19:55:15 +0000 Subject: net/mlx4_en: fix WOL handlers were always looking at port2 capability bit There are 2 capability bits for WOL, one for each port. WOL handlers were looking only on the second bit, regardless of the port. Signed-off-by: Oren Duer Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 84b0b1848f17..ca2c39771c38 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -77,7 +77,8 @@ enum { MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30, MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32, MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34, - MLX4_DEV_CAP_FLAG_WOL = 1LL << 38, + MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37, + MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38, MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40, MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, -- cgit v1.2.3 From 60d6fe99e4a507f77b63c090eb8aacb67e21687a Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sat, 26 Nov 2011 19:55:19 +0000 Subject: net/mlx4_en: adding loopback support Device must be in promiscuous mode or DMAC must be same as the host MAC, or else packet will be dropped by the HW rx filtering. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6562ff6aa9d6..bee8fa231276 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -210,6 +210,7 @@ struct mlx4_wqe_ctrl_seg { * [4] IP checksum * [3:2] C (generate completion queue entry) * [1] SE (solicited event) + * [0] FL (force loopback) */ __be32 srcrb_flags; /* -- cgit v1.2.3 From 1d9d9213d526f2f4ef9a3aa198a29a0b1a670fa1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 18 Nov 2011 14:20:43 +0100 Subject: wireless: Add NoAck per tid support This patch contains the configuration changes in nl80211/cfg80211. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 97bfebfcce90..1fc04853ec95 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -538,6 +538,9 @@ * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME * messages. Note that per PHY only one application may register. * + * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether + * No Acknowledgement Policy should be applied. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -675,6 +678,8 @@ enum nl80211_commands { NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + NL80211_CMD_SET_NOACK_MAP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1185,6 +1190,9 @@ enum nl80211_commands { * abides to when initiating radiation on DFS channels. A country maps * to one DFS region. * + * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of + * up to 16 TIDs. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1428,6 +1436,8 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_HT, NL80211_ATTR_HT_CAPABILITY_MASK, + NL80211_ATTR_NOACK_MAP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From dca7e9430cb3e492437a5ce891b8b3e315c147ca Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 24 Nov 2011 17:15:24 -0800 Subject: {nl,cfg,mac}80211: implement dot11MeshHWMPperrMinInterval As per 802.11mb 13.9.11.3 Signed-off-by: Thomas Pedersen Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- include/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 1fc04853ec95..f51e3bf93a96 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2094,6 +2094,10 @@ enum nl80211_mntr_flags { * access to a broader network beyond the MBSS. This is done via Root * Announcement frames. * + * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which a mesh STA can send only one Action frame containing a + * PERR element. + * * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use @@ -2117,6 +2121,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_ELEMENT_TTL, NL80211_MESHCONF_HWMP_RANN_INTERVAL, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From f7bc83d87d242917ca0ee041ed509f57f361dd56 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Nov 2011 21:20:32 +0100 Subject: PM: Update comments describing device power management callbacks The comments describing device power management callbacks in include/pm.h are outdated and somewhat confusing, so make them reflect the reality more accurately. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 229 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 5c4c8b18c8b7..3f3ed83a9aa5 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -54,118 +54,145 @@ typedef struct pm_message { /** * struct dev_pm_ops - device PM callbacks * - * Several driver power state transitions are externally visible, affecting + * Several device power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) * interrupts, wakeups, DMA, and other hardware state. There may also be - * internal transitions to various low power modes, which are transparent + * internal transitions to various low-power modes which are transparent * to the rest of the driver stack (such as a driver that's ON gating off * clocks which are not in active use). * - * The externally visible transitions are handled with the help of the following - * callbacks included in this structure: - * - * @prepare: Prepare the device for the upcoming transition, but do NOT change - * its hardware state. Prevent new children of the device from being - * registered after @prepare() returns (the driver's subsystem and - * generally the rest of the kernel is supposed to prevent new calls to the - * probe method from being made too once @prepare() has succeeded). If - * @prepare() detects a situation it cannot handle (e.g. registration of a - * child already in progress), it may return -EAGAIN, so that the PM core - * can execute it once again (e.g. after the new child has been registered) - * to recover from the race condition. This method is executed for all - * kinds of suspend transitions and is followed by one of the suspend - * callbacks: @suspend(), @freeze(), or @poweroff(). - * The PM core executes @prepare() for all devices before starting to - * execute suspend callbacks for any of them, so drivers may assume all of - * the other devices to be present and functional while @prepare() is being - * executed. In particular, it is safe to make GFP_KERNEL memory - * allocations from within @prepare(). However, drivers may NOT assume - * anything about the availability of the user space at that time and it - * is not correct to request firmware from within @prepare() (it's too - * late to do that). [To work around this limitation, drivers may - * register suspend and hibernation notifiers that are executed before the - * freezing of tasks.] + * The externally visible transitions are handled with the help of callbacks + * included in this structure in such a way that two levels of callbacks are + * involved. First, the PM core executes callbacks provided by PM domains, + * device types, classes and bus types. They are the subsystem-level callbacks + * supposed to execute callbacks provided by device drivers, although they may + * choose not to do that. If the driver callbacks are executed, they have to + * collaborate with the subsystem-level callbacks to achieve the goals + * appropriate for the given system transition, given transition phase and the + * subsystem the device belongs to. + * + * @prepare: The principal role of this callback is to prevent new children of + * the device from being registered after it has returned (the driver's + * subsystem and generally the rest of the kernel is supposed to prevent + * new calls to the probe method from being made too once @prepare() has + * succeeded). If @prepare() detects a situation it cannot handle (e.g. + * registration of a child already in progress), it may return -EAGAIN, so + * that the PM core can execute it once again (e.g. after a new child has + * been registered) to recover from the race condition. + * This method is executed for all kinds of suspend transitions and is + * followed by one of the suspend callbacks: @suspend(), @freeze(), or + * @poweroff(). The PM core executes subsystem-level @prepare() for all + * devices before starting to invoke suspend callbacks for any of them, so + * generally devices may be assumed to be functional or to respond to + * runtime resume requests while @prepare() is being executed. However, + * device drivers may NOT assume anything about the availability of user + * space at that time and it is NOT valid to request firmware from within + * @prepare() (it's too late to do that). It also is NOT valid to allocate + * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. + * [To work around these limitations, drivers may register suspend and + * hibernation notifiers to be executed before the freezing of tasks.] * * @complete: Undo the changes made by @prepare(). This method is executed for * all kinds of resume transitions, following one of the resume callbacks: * @resume(), @thaw(), @restore(). Also called if the state transition - * fails before the driver's suspend callback (@suspend(), @freeze(), - * @poweroff()) can be executed (e.g. if the suspend callback fails for one + * fails before the driver's suspend callback: @suspend(), @freeze() or + * @poweroff(), can be executed (e.g. if the suspend callback fails for one * of the other devices that the PM core has unsuccessfully attempted to * suspend earlier). - * The PM core executes @complete() after it has executed the appropriate - * resume callback for all devices. + * The PM core executes subsystem-level @complete() after it has executed + * the appropriate resume callbacks for all devices. * * @suspend: Executed before putting the system into a sleep state in which the - * contents of main memory are preserved. Quiesce the device, put it into - * a low power state appropriate for the upcoming system state (such as - * PCI_D3hot), and enable wakeup events as appropriate. + * contents of main memory are preserved. The exact action to perform + * depends on the device's subsystem (PM domain, device type, class or bus + * type), but generally the device must be quiescent after subsystem-level + * @suspend() has returned, so that it doesn't do any I/O or DMA. + * Subsystem-level @suspend() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @resume: Executed after waking the system up from a sleep state in which the - * contents of main memory were preserved. Put the device into the - * appropriate state, according to the information saved in memory by the - * preceding @suspend(). The driver starts working again, responding to - * hardware events and software requests. The hardware may have gone - * through a power-off reset, or it may have maintained state from the - * previous suspend() which the driver may rely on while resuming. On most - * platforms, there are no restrictions on availability of resources like - * clocks during @resume(). + * contents of main memory were preserved. The exact action to perform + * depends on the device's subsystem, but generally the driver is expected + * to start working again, responding to hardware events and software + * requests (the device itself may be left in a low-power state, waiting + * for a runtime resume to occur). The state of the device at the time its + * driver's @resume() callback is run depends on the platform and subsystem + * the device belongs to. On most platforms, there are no restrictions on + * availability of resources like clocks during @resume(). + * Subsystem-level @resume() is executed for all devices after invoking + * subsystem-level @resume_noirq() for all of them. * * @freeze: Hibernation-specific, executed before creating a hibernation image. - * Quiesce operations so that a consistent image can be created, but do NOT - * otherwise put the device into a low power device state and do NOT emit - * system wakeup events. Save in main memory the device settings to be - * used by @restore() during the subsequent resume from hibernation or by - * the subsequent @thaw(), if the creation of the image or the restoration - * of main memory contents from it fails. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. The majority of subsystems + * (with the notable exception of the PCI bus type) expect the driver-level + * @freeze() to save the device settings in memory to be used by @restore() + * during the subsequent resume from hibernation. + * Subsystem-level @freeze() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @thaw: Hibernation-specific, executed after creating a hibernation image OR - * if the creation of the image fails. Also executed after a failing + * if the creation of an image has failed. Also executed after a failing * attempt to restore the contents of main memory from such an image. * Undo the changes made by the preceding @freeze(), so the device can be * operated in the same way as immediately before the call to @freeze(). + * Subsystem-level @thaw() is executed for all devices after invoking + * subsystem-level @thaw_noirq() for all of them. It also may be executed + * directly after @freeze() in case of a transition error. * * @poweroff: Hibernation-specific, executed after saving a hibernation image. - * Quiesce the device, put it into a low power state appropriate for the - * upcoming system state (such as PCI_D3hot), and enable wakeup events as - * appropriate. + * Analogous to @suspend(), but it need not save the device's settings in + * memory. + * Subsystem-level @poweroff() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @restore: Hibernation-specific, executed after restoring the contents of main - * memory from a hibernation image. Driver starts working again, - * responding to hardware events and software requests. Drivers may NOT - * make ANY assumptions about the hardware state right prior to @restore(). - * On most platforms, there are no restrictions on availability of - * resources like clocks during @restore(). - * - * @suspend_noirq: Complete the operations of ->suspend() by carrying out any - * actions required for suspending the device that need interrupts to be - * disabled - * - * @resume_noirq: Prepare for the execution of ->resume() by carrying out any - * actions required for resuming the device that need interrupts to be - * disabled - * - * @freeze_noirq: Complete the operations of ->freeze() by carrying out any - * actions required for freezing the device that need interrupts to be - * disabled - * - * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any - * actions required for thawing the device that need interrupts to be - * disabled - * - * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any - * actions required for handling the device that need interrupts to be - * disabled - * - * @restore_noirq: Prepare for the execution of ->restore() by carrying out any - * actions required for restoring the operations of the device that need - * interrupts to be disabled + * memory from a hibernation image, analogous to @resume(). + * + * @suspend_noirq: Complete the actions started by @suspend(). Carry out any + * additional operations required for suspending the device that might be + * racing with its driver's interrupt handler, which is guaranteed not to + * run while @suspend_noirq() is being executed. + * It generally is expected that the device will be in a low-power state + * (appropriate for the target system sleep state) after subsystem-level + * @suspend_noirq() has returned successfully. If the device can generate + * system wakeup signals and is enabled to wake up the system, it should be + * configured to do so at that time. However, depending on the platform + * and device's subsystem, @suspend() may be allowed to put the device into + * the low-power state and configure it to generate wakeup signals, in + * which case it generally is not necessary to define @suspend_noirq(). + * + * @resume_noirq: Prepare for the execution of @resume() by carrying out any + * operations required for resuming the device that might be racing with + * its driver's interrupt handler, which is guaranteed not to run while + * @resume_noirq() is being executed. + * + * @freeze_noirq: Complete the actions started by @freeze(). Carry out any + * additional operations required for freezing the device that might be + * racing with its driver's interrupt handler, which is guaranteed not to + * run while @freeze_noirq() is being executed. + * The power state of the device should not be changed by either @freeze() + * or @freeze_noirq() and it should not be configured to signal system + * wakeup by any of these callbacks. + * + * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any + * operations required for thawing the device that might be racing with its + * driver's interrupt handler, which is guaranteed not to run while + * @thaw_noirq() is being executed. + * + * @poweroff_noirq: Complete the actions started by @poweroff(). Analogous to + * @suspend_noirq(), but it need not save the device's settings in memory. + * + * @restore_noirq: Prepare for the execution of @restore() by carrying out any + * operations required for thawing the device that might be racing with its + * driver's interrupt handler, which is guaranteed not to run while + * @restore_noirq() is being executed. Analogous to @resume_noirq(). * * All of the above callbacks, except for @complete(), return error codes. * However, the error codes returned by the resume operations, @resume(), - * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do + * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do * not cause the PM core to abort the resume transition during which they are - * returned. The error codes returned in that cases are only printed by the PM + * returned. The error codes returned in those cases are only printed by the PM * core to the system logs for debugging purposes. Still, it is recommended * that drivers only return error codes from their resume methods in case of an * unrecoverable failure (i.e. when the device being handled refuses to resume @@ -174,31 +201,43 @@ typedef struct pm_message { * their children. * * It is allowed to unregister devices while the above callbacks are being - * executed. However, it is not allowed to unregister a device from within any - * of its own callbacks. + * executed. However, a callback routine must NOT try to unregister the device + * it was called for, although it may unregister children of that device (for + * example, if it detects that a child was unplugged while the system was + * asleep). + * + * Refer to Documentation/power/devices.txt for more information about the role + * of the above callbacks in the system suspend process. * - * There also are the following callbacks related to run-time power management - * of devices: + * There also are callbacks related to runtime power management of devices. + * Again, these callbacks are executed by the PM core only for subsystems + * (PM domains, device types, classes and bus types) and the subsystem-level + * callbacks are supposed to invoke the driver callbacks. Moreover, the exact + * actions to be performed by a device driver's callbacks generally depend on + * the platform and subsystem the device belongs to. * * @runtime_suspend: Prepare the device for a condition in which it won't be * able to communicate with the CPU(s) and RAM due to power management. - * This need not mean that the device should be put into a low power state. + * This need not mean that the device should be put into a low-power state. * For example, if the device is behind a link which is about to be turned * off, the device may remain at full power. If the device does go to low - * power and is capable of generating run-time wake-up events, remote - * wake-up (i.e., a hardware mechanism allowing the device to request a - * change of its power state via a wake-up event, such as PCI PME) should - * be enabled for it. + * power and is capable of generating runtime wakeup events, remote wakeup + * (i.e., a hardware mechanism allowing the device to request a change of + * its power state via an interrupt) should be enabled for it. * * @runtime_resume: Put the device into the fully active state in response to a - * wake-up event generated by hardware or at the request of software. If - * necessary, put the device into the full power state and restore its + * wakeup event generated by hardware or at the request of software. If + * necessary, put the device into the full-power state and restore its * registers, so that it is fully operational. * - * @runtime_idle: Device appears to be inactive and it might be put into a low - * power state if all of the necessary conditions are satisfied. Check + * @runtime_idle: Device appears to be inactive and it might be put into a + * low-power state if all of the necessary conditions are satisfied. Check * these conditions and handle the device as appropriate, possibly queueing * a suspend request for it. The return value is ignored by the PM core. + * + * Refer to Documentation/power/runtime_pm.txt for more information about the + * role of the above callbacks in device runtime power management. + * */ struct dev_pm_ops { -- cgit v1.2.3 From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 27 Nov 2011 21:14:46 +0000 Subject: net: Fix corruption in /proc/*/net/dev_mcast I just hit this during my testing. Isn't there another bug lurking? BUG kmalloc-8: Redzone overwritten INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896 .__kmalloc+0x1e0/0x2d0 .__seq_open_private+0x30/0xa0 .seq_open_net+0x60/0xe0 .dev_mc_seq_open+0x4c/0x70 .proc_reg_open+0xd8/0x260 .__dentry_open.clone.11+0x2b8/0x400 .do_last+0xf4/0x950 .path_openat+0xf8/0x480 .do_filp_open+0x48/0xc0 .do_sys_open+0x140/0x250 syscall_exit+0x0/0x40 dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates sizeof(struct seq_net_private) of private data, whereas it expects sizeof(struct dev_iter_state): struct dev_iter_state { struct seq_net_private p; unsigned int pos; /* bucket << BUCKET_SPACE + offset */ }; Create dev_seq_open_ops and use it so we don't have to expose struct dev_iter_state. [ Problem added by commit f04565ddf52e4 (dev: use name hash for dev_seq_ops) -Eric ] Signed-off-by: Anton Blanchard Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cbeb5867cff7..a82ad4dd306a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2536,6 +2536,8 @@ extern void net_disable_timestamp(void); extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); extern void dev_seq_stop(struct seq_file *seq, void *v); +extern int dev_seq_open_ops(struct inode *inode, struct file *file, + const struct seq_operations *ops); #endif extern int netdev_class_create_file(struct class_attribute *class_attr); -- cgit v1.2.3 From 4f718a29fe4908c2cea782f751e9805319684e2b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Nov 2011 09:44:14 +0100 Subject: firmware: Sigma: Prevent out of bounds memory access The SigmaDSP firmware loader currently does not perform enough boundary size checks when processing the firmware. As a result it is possible that a malformed firmware can cause an out of bounds memory access. This patch adds checks which ensure that both the action header and the payload are completely inside the firmware data boundaries before processing them. Signed-off-by: Lars-Peter Clausen Acked-by: Mike Frysinger Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/sigma.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sigma.h b/include/linux/sigma.h index e2accb3164d8..9a138c2946bb 100644 --- a/include/linux/sigma.h +++ b/include/linux/sigma.h @@ -50,11 +50,6 @@ static inline u32 sigma_action_len(struct sigma_action *sa) return (sa->len_hi << 16) | sa->len; } -static inline size_t sigma_action_size(struct sigma_action *sa, u32 payload_len) -{ - return sizeof(*sa) + payload_len + (payload_len % 2); -} - extern int process_sigma_firmware(struct i2c_client *client, const char *name); #endif -- cgit v1.2.3 From bda63586bc5929e97288cdb371bb6456504867ed Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Nov 2011 09:44:16 +0100 Subject: firmware: Sigma: Fix endianess issues Currently the SigmaDSP firmware loader only works correctly on little-endian systems. Fix this by using the proper endianess conversion functions. Signed-off-by: Lars-Peter Clausen Acked-by: Mike Frysinger Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/sigma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sigma.h b/include/linux/sigma.h index 9a138c2946bb..d0de882c0d96 100644 --- a/include/linux/sigma.h +++ b/include/linux/sigma.h @@ -24,7 +24,7 @@ struct sigma_firmware { struct sigma_firmware_header { unsigned char magic[7]; u8 version; - u32 crc; + __le32 crc; }; enum { @@ -40,14 +40,14 @@ enum { struct sigma_action { u8 instr; u8 len_hi; - u16 len; - u16 addr; + __le16 len; + __be16 addr; unsigned char payload[]; }; static inline u32 sigma_action_len(struct sigma_action *sa) { - return (sa->len_hi << 16) | sa->len; + return (sa->len_hi << 16) | le16_to_cpu(sa->len); } extern int process_sigma_firmware(struct i2c_client *client, const char *name); -- cgit v1.2.3 From 75957ba36c05b979701e9ec64b37819adc12f830 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 28 Nov 2011 16:32:35 +0000 Subject: dql: Dynamic queue limits Implementation of dynamic queue limits (dql). This is a libary which allows a queue limit to be dynamically managed. The goal of dql is to set the queue limit, number of objects to the queue, to be minimized without allowing the queue to be starved. dql would be used with a queue which has these properties: 1) Objects are queued up to some limit which can be expressed as a count of objects. 2) Periodically a completion process executes which retires consumed objects. 3) Starvation occurs when limit has been reached, all queued data has actually been consumed but completion processing has not yet run, so queuing new data is blocked. 4) Minimizing the amount of queued data is desirable. A canonical example of such a queue would be a NIC HW transmit queue. The queue limit is dynamic, it will increase or decrease over time depending on the workload. The queue limit is recalculated each time completion processing is done. Increases occur when the queue is starved and can exponentially increase over successive intervals. Decreases occur when more data is being maintained in the queue than needed to prevent starvation. The number of extra objects, or "slack", is measured over successive intervals, and to avoid hysteresis the limit is only reduced by the miminum slack seen over a configurable time period. dql API provides routines to manage the queue: - dql_init is called to intialize the dql structure - dql_reset is called to reset dynamic values - dql_queued called when objects are being enqueued - dql_avail returns availability in the queue - dql_completed is called when objects have be consumed in the queue Configuration consists of: - max_limit, maximum limit - min_limit, minimum limit - slack_hold_time, time to measure instances of slack before reducing queue limit Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dynamic_queue_limits.h | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 include/linux/dynamic_queue_limits.h (limited to 'include/linux') diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h new file mode 100644 index 000000000000..5621547d631b --- /dev/null +++ b/include/linux/dynamic_queue_limits.h @@ -0,0 +1,97 @@ +/* + * Dynamic queue limits (dql) - Definitions + * + * Copyright (c) 2011, Tom Herbert + * + * This header file contains the definitions for dynamic queue limits (dql). + * dql would be used in conjunction with a producer/consumer type queue + * (possibly a HW queue). Such a queue would have these general properties: + * + * 1) Objects are queued up to some limit specified as number of objects. + * 2) Periodically a completion process executes which retires consumed + * objects. + * 3) Starvation occurs when limit has been reached, all queued data has + * actually been consumed, but completion processing has not yet run + * so queuing new data is blocked. + * 4) Minimizing the amount of queued data is desirable. + * + * The goal of dql is to calculate the limit as the minimum number of objects + * needed to prevent starvation. + * + * The primary functions of dql are: + * dql_queued - called when objects are enqueued to record number of objects + * dql_avail - returns how many objects are available to be queued based + * on the object limit and how many objects are already enqueued + * dql_completed - called at completion time to indicate how many objects + * were retired from the queue + * + * The dql implementation does not implement any locking for the dql data + * structures, the higher layer should provide this. dql_queued should + * be serialized to prevent concurrent execution of the function; this + * is also true for dql_completed. However, dql_queued and dlq_completed can + * be executed concurrently (i.e. they can be protected by different locks). + */ + +#ifndef _LINUX_DQL_H +#define _LINUX_DQL_H + +#ifdef __KERNEL__ + +struct dql { + /* Fields accessed in enqueue path (dql_queued) */ + unsigned int num_queued; /* Total ever queued */ + unsigned int adj_limit; /* limit + num_completed */ + unsigned int last_obj_cnt; /* Count at last queuing */ + + /* Fields accessed only by completion path (dql_completed) */ + + unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ + unsigned int num_completed; /* Total ever completed */ + + unsigned int prev_ovlimit; /* Previous over limit */ + unsigned int prev_num_queued; /* Previous queue total */ + unsigned int prev_last_obj_cnt; /* Previous queuing cnt */ + + unsigned int lowest_slack; /* Lowest slack found */ + unsigned long slack_start_time; /* Time slacks seen */ + + /* Configuration */ + unsigned int max_limit; /* Max limit */ + unsigned int min_limit; /* Minimum limit */ + unsigned int slack_hold_time; /* Time to measure slack */ +}; + +/* Set some static maximums */ +#define DQL_MAX_OBJECT (UINT_MAX / 16) +#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) + +/* + * Record number of objects queued. Assumes that caller has already checked + * availability in the queue with dql_avail. + */ +static inline void dql_queued(struct dql *dql, unsigned int count) +{ + BUG_ON(count > DQL_MAX_OBJECT); + + dql->num_queued += count; + dql->last_obj_cnt = count; +} + +/* Returns how many objects can be queued, < 0 indicates over limit. */ +static inline int dql_avail(const struct dql *dql) +{ + return dql->adj_limit - dql->num_queued; +} + +/* Record number of completed objects and recalculate the limit. */ +void dql_completed(struct dql *dql, unsigned int count); + +/* Reset dql state */ +void dql_reset(struct dql *dql); + +/* Initialize dql state */ +int dql_init(struct dql *dql, unsigned hold_time); + +#endif /* _KERNEL_ */ + +#endif /* _LINUX_DQL_H */ -- cgit v1.2.3 From 7346649826382b769cfadf4a2fe8a84d060c55e9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 28 Nov 2011 16:32:44 +0000 Subject: net: Add queue state xoff flag for stack Create separate queue state flags so that either the stack or drivers can turn on XOFF. Added a set of functions used in the stack to determine if a queue is really stopped (either by stack or driver) Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ac9a4b9344ca..d19f93265cac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -517,11 +517,23 @@ static inline void napi_synchronize(const struct napi_struct *n) #endif enum netdev_queue_state_t { - __QUEUE_STATE_XOFF, + __QUEUE_STATE_DRV_XOFF, + __QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_FROZEN, -#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF) | \ - (1 << __QUEUE_STATE_FROZEN)) +#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ + (1 << __QUEUE_STATE_STACK_XOFF)) +#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ + (1 << __QUEUE_STATE_FROZEN)) }; +/* + * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The + * netif_tx_* functions below are used to manipulate this flag. The + * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit + * queue independently. The netif_xmit_*stopped functions below are called + * to check if the queue has been stopped by the driver or stack (either + * of the XOFF bits are set in the state). Drivers should not need to call + * netif_xmit*stopped functions, they should only be using netif_tx_*. + */ struct netdev_queue { /* @@ -1718,7 +1730,7 @@ extern void __netif_schedule(struct Qdisc *q); static inline void netif_schedule_queue(struct netdev_queue *txq) { - if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) + if (!(txq->state & QUEUE_STATE_ANY_XOFF)) __netif_schedule(txq->qdisc); } @@ -1732,7 +1744,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev) static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) { - clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); + clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** @@ -1764,7 +1776,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) return; } #endif - if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) __netif_schedule(dev_queue->qdisc); } @@ -1796,7 +1808,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); return; } - set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); + set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** @@ -1823,7 +1835,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev) static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { - return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); + return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** @@ -1837,9 +1849,16 @@ static inline int netif_queue_stopped(const struct net_device *dev) return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } -static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue) +static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue) { - return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN; + return dev_queue->state & QUEUE_STATE_ANY_XOFF; +} + +static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) +{ + return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; +} + } /** @@ -1926,7 +1945,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) __netif_schedule(txq->qdisc); } -- cgit v1.2.3 From c5d67bd78c5dc540e3461c36fb3d389fbe0de4c3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 28 Nov 2011 16:32:52 +0000 Subject: net: Add netdev interfaces for recording sends/comp Add interfaces for drivers to call for recording number of packets and bytes at send time and transmit completion. Also, added a function to "reset" a queue. These will be used by Byte Queue Limits. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d19f93265cac..9b24cc7a54d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1859,6 +1859,34 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } +static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, + unsigned int bytes) +{ +} + +static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) +{ + netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); +} + +static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, + unsigned pkts, unsigned bytes) +{ +} + +static inline void netdev_completed_queue(struct net_device *dev, + unsigned pkts, unsigned bytes) +{ + netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes); +} + +static inline void netdev_tx_reset_queue(struct netdev_queue *q) +{ +} + +static inline void netdev_reset_queue(struct net_device *dev_queue) +{ + netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); } /** -- cgit v1.2.3 From 114cf5802165ee93e3ab461c9c505cd94a08b800 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 28 Nov 2011 16:33:09 +0000 Subject: bql: Byte queue limits Networking stack support for byte queue limits, uses dynamic queue limits library. Byte queue limits are maintained per transmit queue, and a dql structure has been added to netdev_queue structure for this purpose. Configuration of bql is in the tx- sysfs directory for the queue under the byte_queue_limits directory. Configuration includes: limit_min, bql minimum limit limit_max, bql maximum limit hold_time, bql slack hold time Also under the directory are: limit, current byte limit inflight, current number of bytes on the queue Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9b24cc7a54d1..97edb3215a5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -541,7 +542,6 @@ struct netdev_queue { */ struct net_device *dev; struct Qdisc *qdisc; - unsigned long state; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; @@ -564,6 +564,12 @@ struct netdev_queue { * (/sys/class/net/DEV/Q/trans_timeout) */ unsigned long trans_timeout; + + unsigned long state; + +#ifdef CONFIG_BQL + struct dql dql; +#endif } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1862,6 +1868,15 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { +#ifdef CONFIG_BQL + dql_queued(&dev_queue->dql, bytes); + if (unlikely(dql_avail(&dev_queue->dql) < 0)) { + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); + if (unlikely(dql_avail(&dev_queue->dql) >= 0)) + clear_bit(__QUEUE_STATE_STACK_XOFF, + &dev_queue->state); + } +#endif } static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) @@ -1872,6 +1887,18 @@ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsigned pkts, unsigned bytes) { +#ifdef CONFIG_BQL + if (likely(bytes)) { + dql_completed(&dev_queue->dql, bytes); + if (unlikely(test_bit(__QUEUE_STATE_STACK_XOFF, + &dev_queue->state) && + dql_avail(&dev_queue->dql) >= 0)) { + if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, + &dev_queue->state)) + netif_schedule_queue(dev_queue); + } + } +#endif } static inline void netdev_completed_queue(struct net_device *dev, @@ -1882,6 +1909,9 @@ static inline void netdev_completed_queue(struct net_device *dev, static inline void netdev_tx_reset_queue(struct netdev_queue *q) { +#ifdef CONFIG_BQL + dql_reset(&q->dql); +#endif } static inline void netdev_reset_queue(struct net_device *dev_queue) -- cgit v1.2.3 From 018690d33ecf4aa1eb1415e38c40e2b0b6c7808e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 29 Nov 2011 20:10:36 +0000 Subject: regmap: Allow regmap_update_bits() users to detect changes Some users of regmap_update_bits() would like to be able to tell their users if they actually did an update so provide a variant which also returns a flag indicating if an update took place. We could return a tristate in the return value of regmap_update_bits() but this makes the API more cumbersome to use and doesn't fit with the general zero for success idiom we have. Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 81dfe0acb20c..a83e4a097abd 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -138,6 +138,9 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); int regmap_update_bits(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val); +int regmap_update_bits_check(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change); int regcache_sync(struct regmap *map); void regcache_cache_only(struct regmap *map, bool enable); -- cgit v1.2.3 From 596b9b68ef118f7409afbc78487263e08ef96261 Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 25 Jul 2011 00:01:25 +0000 Subject: neigh: Add infrastructure for allocating device neigh privates. netdev->neigh_priv_len records the private area length. This will trigger for neigh_table objects which set tbl->entry_size to zero, and the first instances of this will be forthcoming. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97edb3215a5a..5462c2cd5eab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1080,6 +1080,7 @@ struct net_device { unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_assign_type; /* hw address assignment type */ unsigned char addr_len; /* hardware address length */ + unsigned char neigh_priv_len; unsigned short dev_id; /* for shared network cards */ spinlock_t addr_list_lock; -- cgit v1.2.3 From da6a8fa0275e2178c44a875374cae80d057538d1 Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 25 Jul 2011 00:01:38 +0000 Subject: neigh: Add device constructor/destructor capability. If the neigh entry has device private state, it will need constructor/destructor ops. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5462c2cd5eab..1c4ddb37f2b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,6 +974,8 @@ struct net_device_ops { netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); + int (*ndo_neigh_construct)(struct neighbour *n); + int (*ndo_neigh_destroy)(struct neighbour *n); }; /* -- cgit v1.2.3 From 7bc0f28c7a0cd19f40e5a6e4d0a117db9a4e4cd5 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Wed, 30 Nov 2011 12:20:26 +0000 Subject: netem: rate extension Currently netem is not in the ability to emulate channel bandwidth. Only static delay (and optional random jitter) can be configured. To emulate the channel rate the token bucket filter (sch_tbf) can be used. But TBF has some major emulation flaws. The buffer (token bucket depth/rate) cannot be 0. Also the idea behind TBF is that the credit (token in buckets) fills if no packet is transmitted. So that there is always a "positive" credit for new packets. In real life this behavior contradicts the law of nature where nothing can travel faster as speed of light. E.g.: on an emulated 1000 byte/s link a small IPv4/TCP SYN packet with ~50 byte require ~0.05 seconds - not 0 seconds. Netem is an excellent place to implement a rate limiting feature: static delay is already implemented, tfifo already has time information and the user can skip TBF configuration completely. This patch implement rate feature which can be configured via tc. e.g: tc qdisc add dev eth0 root netem rate 10kbit To emulate a link of 5000byte/s and add an additional static delay of 10ms: tc qdisc add dev eth0 root netem delay 10ms rate 5KBps Note: similar to TBF the rate extension is bounded to the kernel timing system. Depending on the architecture timer granularity, higher rates (e.g. 10mbit/s and higher) tend to transmission bursts. Also note: further queues living in network adaptors; see ethtool(8). Signed-off-by: Hagen Paul Pfeifer Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 7281d5acf2f9..fb556dc594d3 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -465,6 +465,7 @@ enum { TCA_NETEM_REORDER, TCA_NETEM_CORRUPT, TCA_NETEM_LOSS, + TCA_NETEM_RATE, __TCA_NETEM_MAX, }; @@ -495,6 +496,10 @@ struct tc_netem_corrupt { __u32 correlation; }; +struct tc_netem_rate { + __u32 rate; /* byte/s */ +}; + enum { NETEM_LOSS_UNSPEC, NETEM_LOSS_GI, /* General Intuitive - 4 state model */ -- cgit v1.2.3 From 53e4acea0e819a6a8513e10a0773f2259ede0481 Mon Sep 17 00:00:00 2001 From: Chris Blair Date: Tue, 8 Nov 2011 08:54:46 +0000 Subject: spi/pl022: add support for pm_runtime autosuspend Adds support for configuring the spi bus to use autosuspend for runtime power management. This can reduce the latency in starting an spi transfer by not suspending the device immediately following completion of a transfer. If another transfer then takes place before the autosuspend timeout, the call to resume the device can return immediately rather than needing to risk sleeping in order to resume the device. Reviewed-by: Viresh Kumar Signed-off-by: Chris Blair Signed-off-by: Linus Walleij --- include/linux/amba/pl022.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 4ce98f54186b..572f637299c9 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -238,6 +238,9 @@ struct dma_chan; * @enable_dma: if true enables DMA driven transfers. * @dma_rx_param: parameter to locate an RX DMA channel. * @dma_tx_param: parameter to locate a TX DMA channel. + * @autosuspend_delay: delay in ms following transfer completion before the + * runtime power management system suspends the device. A setting of 0 + * indicates no delay and the device will be suspended immediately. */ struct pl022_ssp_controller { u16 bus_id; @@ -246,6 +249,7 @@ struct pl022_ssp_controller { bool (*dma_filter)(struct dma_chan *chan, void *filter_param); void *dma_rx_param; void *dma_tx_param; + int autosuspend_delay; }; /** -- cgit v1.2.3 From 2a367c3a82557cd11a04949ef2160658987fa772 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Wed, 30 Nov 2011 23:41:18 +0000 Subject: can: cc770: add driver core for the Bosch CC770 and Intel AN82527 Signed-off-by: Wolfgang Grandegger Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- include/linux/can/platform/cc770.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/can/platform/cc770.h (limited to 'include/linux') diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h new file mode 100644 index 000000000000..7702641f87ee --- /dev/null +++ b/include/linux/can/platform/cc770.h @@ -0,0 +1,33 @@ +#ifndef _CAN_PLATFORM_CC770_H_ +#define _CAN_PLATFORM_CC770_H_ + +/* CPU Interface Register (0x02) */ +#define CPUIF_CEN 0x01 /* Clock Out Enable */ +#define CPUIF_MUX 0x04 /* Multiplex */ +#define CPUIF_SLP 0x08 /* Sleep */ +#define CPUIF_PWD 0x10 /* Power Down Mode */ +#define CPUIF_DMC 0x20 /* Divide Memory Clock */ +#define CPUIF_DSC 0x40 /* Divide System Clock */ +#define CPUIF_RST 0x80 /* Hardware Reset Status */ + +/* Clock Out Register (0x1f) */ +#define CLKOUT_CD_MASK 0x0f /* Clock Divider mask */ +#define CLKOUT_SL_MASK 0x30 /* Slew Rate mask */ +#define CLKOUT_SL_SHIFT 4 + +/* Bus Configuration Register (0x2f) */ +#define BUSCFG_DR0 0x01 /* Disconnect RX0 Input / Select RX input */ +#define BUSCFG_DR1 0x02 /* Disconnect RX1 Input / Silent mode */ +#define BUSCFG_DT1 0x08 /* Disconnect TX1 Output */ +#define BUSCFG_POL 0x20 /* Polarity dominant or recessive */ +#define BUSCFG_CBY 0x40 /* Input Comparator Bypass */ + +struct cc770_platform_data { + u32 osc_freq; /* CAN bus oscillator frequency in Hz */ + + u8 cir; /* CPU Interface Register */ + u8 cor; /* Clock Out Register */ + u8 bcr; /* Bus Configuration Register */ +}; + +#endif /* !_CAN_PLATFORM_CC770_H_ */ -- cgit v1.2.3 From 65698610f58153eb7621be3fb4f57ca318b19c60 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 14:16:04 -0500 Subject: net: Make ndo_neigh_destroy return void. The return value isn't used. Suggested by Ben Hucthings. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1c4ddb37f2b5..21440e31fdab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -975,7 +975,7 @@ struct net_device_ops { int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); int (*ndo_neigh_construct)(struct neighbour *n); - int (*ndo_neigh_destroy)(struct neighbour *n); + void (*ndo_neigh_destroy)(struct neighbour *n); }; /* -- cgit v1.2.3 From 00dc9ad18d707f36b2fb4af98fd2cf0548d2b258 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:01:31 +0100 Subject: PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 ++ include/linux/pm_qos.h | 3 +++ include/linux/pm_runtime.h | 5 +++++ 3 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83a9aa5..a7676efa6831 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -521,6 +521,8 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; + ktime_t suspend_time; + s64 max_time_suspended_ns; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ struct pm_qos_constraints *constraints; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea302a80..775a3236343d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -78,6 +78,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); +s32 __dev_pm_qos_read_value(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); @@ -119,6 +120,8 @@ static inline int pm_qos_request_active(struct pm_qos_request *req) static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) { return 0; } +static inline s32 __dev_pm_qos_read_value(struct device *dev) + { return 0; } static inline s32 dev_pm_qos_read_value(struct device *dev) { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d3085e72a0ee..609daae7a014 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -45,6 +45,8 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); +extern void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns); static inline bool pm_children_suspended(struct device *dev) { @@ -148,6 +150,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } +static inline void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns) {} + #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) -- cgit v1.2.3 From d5e4cbfe2049fca375cb19c4bc0cf676e8b4a88a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:36 +0100 Subject: PM / Domains: Make it possible to use per-device domain callbacks The current generic PM domains code requires that the same .stop(), .start() and .active_wakeup() device callback routines be used for all devices in the given domain, which is inflexible and may not cover some specific use cases. For this reason, make it possible to use device specific .start()/.stop() and .active_wakeup() callback routines by adding corresponding callback pointers to struct generic_pm_domain_data. Add a new helper routine, pm_genpd_register_callbacks(), that can be used to populate the new per-device callback pointers. Modify the shmobile's power domains code to allow drivers to add their own code to be run during the device stop and start operations with the help of the new callback pointers. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- include/linux/pm_domain.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 65633e5a2bc0..8949d2d202ae 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -23,6 +23,12 @@ struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); }; +struct gpd_dev_ops { + int (*start)(struct device *dev); + int (*stop)(struct device *dev); + bool (*active_wakeup)(struct device *dev); +}; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ @@ -45,9 +51,7 @@ struct generic_pm_domain { bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); - int (*start_device)(struct device *dev); - int (*stop_device)(struct device *dev); - bool (*active_wakeup)(struct device *dev); + struct gpd_dev_ops dev_ops; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -64,6 +68,7 @@ struct gpd_link { struct generic_pm_domain_data { struct pm_domain_data base; + struct gpd_dev_ops ops; bool need_restore; }; @@ -73,6 +78,11 @@ static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data * } #ifdef CONFIG_PM_GENERIC_DOMAINS +static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) +{ + return to_gpd_data(dev->power.subsys_data->domain_data); +} + extern int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, @@ -81,6 +91,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); +extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); +extern int pm_genpd_remove_callbacks(struct device *dev); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); @@ -105,6 +117,15 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } +static inline int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops) +{ + return -ENOSYS; +} +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return -ENOSYS; +} static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) {} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) -- cgit v1.2.3 From ecf00475f229fcf06362412ad2d15a3267e354a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:44 +0100 Subject: PM / Domains: Introduce "save/restore state" device callbacks The current PM domains code uses device drivers' .runtime_suspend() and .runtime_resume() callbacks as the "save device state" and "restore device state" operations, which may not be appropriate in general, because it forces drivers to assume that they always will be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. It also may be located in a PM domain that's not handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, introduce new device callbacks, .save_state() and .restore_state(), that can be supplied by the drivers in addition to their "standard" runtime PM callbacks. This will allow the drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default .save_state() and .restore_state() callback routines for PM domains that will execute a device driver's .runtime_suspend() and .runtime_resume() callbacks, respectively, for the given device if the driver doesn't provide its own implementations of .save_state() and .restore_state(). Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8949d2d202ae..731080dad250 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -26,6 +26,8 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); + int (*save_state)(struct device *dev); + int (*restore_state)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From d23b9b00cdde5c93b914a172cecd57d5625fcd04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:51 +0100 Subject: PM / Domains: Rework system suspend callback routines (v2) The current generic PM domains code attempts to use the generic system suspend operations along with the domains' device stop/start routines, which requires device drivers to assume that their system suspend/resume (and hibernation/restore) callbacks will always be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. Also, the domain the hardware belongs to may not be handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, add new device callbacks, .suspend(), .suspend_late(), .resume_early(), .resume(), .freeze(), .freeze_late(), .thaw_early(), and .thaw(), that can be supplied by the drivers in addition to their "standard" system suspend and hibernation callbacks. These new callbacks, if defined, will be used by the generic PM domains code for the handling of system suspend and hibernation instead of the "standard" ones. This will allow drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default implementations of the new callbacks for PM domains that will execute pm_generic_suspend(), pm_generic_suspend_noirq(), pm_generic_resume_noirq(), pm_generic_resume(), pm_generic_freeze(), pm_generic_freeze_noirq(), pm_generic_thaw_noirq(), and pm_generic_thaw(), respectively, for the given device if its driver doesn't define those callbacks. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 731080dad250..10a197dce07e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -28,6 +28,14 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); int (*save_state)(struct device *dev); int (*restore_state)(struct device *dev); + int (*suspend)(struct device *dev); + int (*suspend_late)(struct device *dev); + int (*resume_early)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*freeze_late)(struct device *dev); + int (*thaw_early)(struct device *dev); + int (*thaw)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From b02c999ac325e977585abeb4caf6e0a2ee21e30b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:05 +0100 Subject: PM / Domains: Add device stop governor function (v4) Add a function deciding whether or not devices should be stopped in pm_genpd_runtime_suspend() depending on their PM QoS constraints and stop/start timing values. Make it possible to add information used by this function to device objects. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- include/linux/pm_domain.h | 63 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 10a197dce07e..f6745c213a57 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -21,6 +21,7 @@ enum gpd_status { struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); + bool (*stop_ok)(struct device *dev); }; struct gpd_dev_ops { @@ -76,9 +77,16 @@ struct gpd_link { struct list_head slave_node; }; +struct gpd_timing_data { + s64 stop_latency_ns; + s64 start_latency_ns; + s64 break_even_ns; +}; + struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; + struct gpd_timing_data td; bool need_restore; }; @@ -93,20 +101,48 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern int pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev); +extern struct dev_power_governor simple_qos_governor; + +extern struct generic_pm_domain *dev_to_genpd(struct device *dev); +extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td); + +static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev) +{ + return __pm_genpd_add_device(genpd, dev, NULL); +} + extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); -extern int pm_genpd_remove_callbacks(struct device *dev); +extern int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops, + struct gpd_timing_data *td); +extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); + extern int pm_genpd_poweron(struct generic_pm_domain *genpd); + +extern bool default_stop_ok(struct device *dev); + #else + +static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) +{ + return ERR_PTR(-ENOSYS); +} +static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td) +{ + return -ENOSYS; +} static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { @@ -128,22 +164,33 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return -ENOSYS; } static inline int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops) + struct gpd_dev_ops *ops, + struct gpd_timing_data *td) { return -ENOSYS; } -static inline int pm_genpd_remove_callbacks(struct device *dev) +static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) { return -ENOSYS; } -static inline void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off) {} +static inline void pm_genpd_init(struct generic_pm_domain *genpd, bool is_off) +{ +} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } +static inline bool default_stop_ok(struct device *dev) +{ + return false; +} #endif +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return __pm_genpd_remove_callbacks(dev, true); +} + #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); -- cgit v1.2.3 From 221e9b58380abdd6c05e11b4538597e2586ee141 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:10 +0100 Subject: PM / Domains: Add default power off governor function (v4) Add a function deciding whether or not a given PM domain should be powered off on the basis of the PM QoS constraints of devices belonging to it and their PM QoS timing data. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f6745c213a57..cc1a2450ff7b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,8 +61,13 @@ struct generic_pm_domain { bool suspend_power_off; /* Power status before system suspend */ bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); + s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); + s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; + s64 break_even_ns; /* Power break even for the entire domain. */ + s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ + ktime_t power_off_time; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -80,6 +85,8 @@ struct gpd_link { struct gpd_timing_data { s64 stop_latency_ns; s64 start_latency_ns; + s64 save_state_latency_ns; + s64 restore_state_latency_ns; s64 break_even_ns; }; -- cgit v1.2.3 From 4f042cdad40e1566a53b7ae85e72b6945a4b0fde Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 1 Dec 2011 00:05:31 +0100 Subject: PM / Domains: fix compilation failure for CONFIG_PM_GENERIC_DOMAINS unset Fix the following compalitaion breakage: In file included from linux/drivers/sh/pm_runtime.c:15: linux/include/linux/pm_domain.h: In function 'dev_to_genpd': linux/include/linux/pm_domain.h:142: error: implicit declaration of function 'ERR_PTR' linux/include/linux/pm_domain.h:142: warning: return makes pointer from integer without a cast In file included from linux/include/linux/sh_clk.h:10, from linux/drivers/sh/pm_runtime.c:19: linux/include/linux/err.h: At top level: linux/include/linux/err.h:22: error: conflicting types for 'ERR_PTR' linux/include/linux/pm_domain.h:142: note: previous implicit declaration of 'ERR_PTR' was here make[3]: *** [drivers/sh/pm_runtime.o] Error 1 Reported-by: Nobuhiro Iwamatsu Signed-off-by: Guennadi Liakhovetski Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index cc1a2450ff7b..fbb81bc5065a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -10,6 +10,7 @@ #define _LINUX_PM_DOMAIN_H #include +#include enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ -- cgit v1.2.3 From 42b2aa86c6670347a2a07e6d7af0e0ecc8fdbff9 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Mon, 28 Nov 2011 20:31:00 -0800 Subject: treewide: Fix typos in various parts of the kernel, and fix some comments. The below patch fixes some typos in various parts of the kernel, as well as fixes some comments. Please let me know if I missed anything, and I will try to get it changed and resent. Signed-off-by: Justin P. Mattock Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- include/linux/wanrouter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index e0aa39612eba..3157cc1fada6 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -309,7 +309,7 @@ typedef struct wandev_conf #define WANOPT_EVEN 2 /* CHDLC Protocol Options */ -/* DF Commmented out for now. +/* DF Commented out for now. #define WANOPT_CHDLC_NO_DCD IGNORE_DCD_FOR_LINK_STAT #define WANOPT_CHDLC_NO_CTS IGNORE_CTS_FOR_LINK_STAT -- cgit v1.2.3 From 86b1309c7e411b7c25dc0dc7a092582a4d291044 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 10 Nov 2011 19:14:51 -0800 Subject: genetlink: Add lockdep_genl_is_held(). Open vSwitch uses genl_mutex locking to protect datapath data-structures like flow-table, flow-actions. Following patch adds lockdep_genl_is_held() which is used for rcu annotation to prove locking. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- include/linux/genetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 61549b26ad6f..59311adfb0e0 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -85,6 +85,9 @@ enum { /* All generic netlink requests are serialized by a global lock. */ extern void genl_lock(void); extern void genl_unlock(void); +#ifdef CONFIG_PROVE_LOCKING +extern int lockdep_genl_is_held(void); +#endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From b4e16611c4e1cd98765269c8fdaf43f96baa57b1 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Nov 2011 16:21:37 -0800 Subject: genetlink: Add rcu_dereference_genl and genl_dereference. This adds rcu_dereference_genl and genl_dereference, which are genl variants of the RTNL functions to enforce proper locking with lockdep and sparse. Signed-off-by: Jesse Gross --- include/linux/genetlink.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 59311adfb0e0..73c28dea10ae 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -89,6 +89,27 @@ extern void genl_unlock(void); extern int lockdep_genl_is_held(void); #endif +/** + * rcu_dereference_genl - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference() + */ +#define rcu_dereference_genl(p) \ + rcu_dereference_check(p, lockdep_genl_is_held()) + +/** + * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex + * @p: The pointer to read, prior to dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * caller holds genl mutex. + */ +#define genl_dereference(p) \ + rcu_dereference_protected(p, lockdep_genl_is_held()) + #endif /* __KERNEL__ */ #endif /* __LINUX_GENERIC_NETLINK_H */ -- cgit v1.2.3 From 396cf9430505cfba529a2f2a037d782719fa5844 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 18 Nov 2011 13:15:54 -0800 Subject: vlan: Move vlan_set_encap_proto() to vlan header file Open vSwitch needs this function for vlan handling. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- include/linux/if_vlan.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 12d5543b14f2..070ac50c1d2d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -310,6 +310,40 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) return protocol; } + +static inline void vlan_set_encap_proto(struct sk_buff *skb, + struct vlan_hdr *vhdr) +{ + __be16 proto; + unsigned char *rawp; + + /* + * Was a VLAN packet, grab the encapsulated protocol, which the layer + * three protocols care about. + */ + + proto = vhdr->h_vlan_encapsulated_proto; + if (ntohs(proto) >= 1536) { + skb->protocol = proto; + return; + } + + rawp = skb->data; + if (*(unsigned short *) rawp == 0xFFFF) + /* + * This is a magic hack to spot IPX packets. Older Novell + * breaks the protocol design and runs IPX over 802.3 without + * an 802.2 LLC layer. We look for FFFF which isn't a used + * 802.2 SSAP/DSAP. This won't work for fault tolerant netware + * but does for the rest. + */ + skb->protocol = htons(ETH_P_802_3); + else + /* + * Real 802.2 LLC + */ + skb->protocol = htons(ETH_P_802_2); +} #endif /* __KERNEL__ */ /* VLAN IOCTLs are found in sockios.h */ -- cgit v1.2.3 From ccb1352e76cff0524e7ccb2074826a092dd13016 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 25 Oct 2011 19:26:31 -0700 Subject: net: Add Open vSwitch kernel components. Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features expected in a traditional hardware switch, it enables fine-grained programmatic extension and flow-based control of the network. This control is useful in a wide variety of applications but is particularly important in multi-server virtualization deployments, which are often characterized by highly dynamic endpoints and the need to maintain logical abstractions for multiple tenants. The Open vSwitch datapath provides an in-kernel fast path for packet forwarding. It is complemented by a userspace daemon, ovs-vswitchd, which is able to accept configuration from a variety of sources and translate it into packet processing rules. See http://openvswitch.org for more information and userspace utilities. Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 452 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 include/linux/openvswitch.h (limited to 'include/linux') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h new file mode 100644 index 000000000000..eb1efa54fe84 --- /dev/null +++ b/include/linux/openvswitch.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _LINUX_OPENVSWITCH_H +#define _LINUX_OPENVSWITCH_H 1 + +#include + +/** + * struct ovs_header - header for OVS Generic Netlink messages. + * @dp_ifindex: ifindex of local port for datapath (0 to make a request not + * specific to a datapath). + * + * Attributes following the header are specific to a particular OVS Generic + * Netlink family, but all of the OVS families use this header. + */ + +struct ovs_header { + int dp_ifindex; +}; + +/* Datapaths. */ + +#define OVS_DATAPATH_FAMILY "ovs_datapath" +#define OVS_DATAPATH_MCGROUP "ovs_datapath" +#define OVS_DATAPATH_VERSION 0x1 + +enum ovs_datapath_cmd { + OVS_DP_CMD_UNSPEC, + OVS_DP_CMD_NEW, + OVS_DP_CMD_DEL, + OVS_DP_CMD_GET, + OVS_DP_CMD_SET +}; + +/** + * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. + * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local + * port". This is the name of the network device whose dp_ifindex is given in + * the &struct ovs_header. Always present in notifications. Required in + * %OVS_DP_NEW requests. May be used as an alternative to specifying + * dp_ifindex in other requests (with a dp_ifindex of 0). + * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially + * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on + * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should + * not be sent. + * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the + * datapath. Always present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_DP_* commands. + */ +enum ovs_datapath_attr { + OVS_DP_ATTR_UNSPEC, + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + __OVS_DP_ATTR_MAX +}; + +#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) + +struct ovs_dp_stats { + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ + __u64 n_flows; /* Number of flows present */ +}; + +struct ovs_vport_stats { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ +}; + +/* Fixed logical ports. */ +#define OVSP_LOCAL ((__u16)0) + +/* Packet transfer. */ + +#define OVS_PACKET_FAMILY "ovs_packet" +#define OVS_PACKET_VERSION 0x1 + +enum ovs_packet_cmd { + OVS_PACKET_CMD_UNSPEC, + + /* Kernel-to-user notifications. */ + OVS_PACKET_CMD_MISS, /* Flow table miss. */ + OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ + + /* Userspace commands. */ + OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ +}; + +/** + * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. + * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire + * packet as received, from the start of the Ethernet header onward. For + * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by + * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is + * the flow key extracted from the packet as originally received. + * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key + * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows + * userspace to adapt its flow setup strategy by comparing its notion of the + * flow key against the kernel's. + * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used + * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. + * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_USERDATA attribute. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_PACKET_* commands. + */ +enum ovs_packet_attr { + OVS_PACKET_ATTR_UNSPEC, + OVS_PACKET_ATTR_PACKET, /* Packet data. */ + OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ + OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ + __OVS_PACKET_ATTR_MAX +}; + +#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) + +/* Virtual ports. */ + +#define OVS_VPORT_FAMILY "ovs_vport" +#define OVS_VPORT_MCGROUP "ovs_vport" +#define OVS_VPORT_VERSION 0x1 + +enum ovs_vport_cmd { + OVS_VPORT_CMD_UNSPEC, + OVS_VPORT_CMD_NEW, + OVS_VPORT_CMD_DEL, + OVS_VPORT_CMD_GET, + OVS_VPORT_CMD_SET +}; + +enum ovs_vport_type { + OVS_VPORT_TYPE_UNSPEC, + OVS_VPORT_TYPE_NETDEV, /* network device */ + OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ + __OVS_VPORT_TYPE_MAX +}; + +#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) + +/** + * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. + * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. + * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type + * of vport. + * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device + * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes + * plus a null terminator. + * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. + * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that + * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on + * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for + * packets sent or received through the vport. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_VPORT_* commands. + * + * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and + * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is + * optional; if not specified a free port number is automatically selected. + * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type + * of vport. + * and other attributes are ignored. + * + * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to + * look up the vport to operate on; otherwise dp_idx from the &struct + * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. + */ +enum ovs_vport_attr { + OVS_VPORT_ATTR_UNSPEC, + OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ + OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ + OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ + OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ + OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + __OVS_VPORT_ATTR_MAX +}; + +#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) + +/* Flows. */ + +#define OVS_FLOW_FAMILY "ovs_flow" +#define OVS_FLOW_MCGROUP "ovs_flow" +#define OVS_FLOW_VERSION 0x1 + +enum ovs_flow_cmd { + OVS_FLOW_CMD_UNSPEC, + OVS_FLOW_CMD_NEW, + OVS_FLOW_CMD_DEL, + OVS_FLOW_CMD_GET, + OVS_FLOW_CMD_SET +}; + +struct ovs_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ +}; + +enum ovs_key_attr { + OVS_KEY_ATTR_UNSPEC, + OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ + OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ + OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ + OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ + OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ + OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ + OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ + OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ + OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ + OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ + OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ + OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ + OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ + OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ + __OVS_KEY_ATTR_MAX +}; + +#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) + +/** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. + * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. + * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. + * + * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct + * ovs_key_ipv6. + */ +enum ovs_frag_type { + OVS_FRAG_TYPE_NONE, + OVS_FRAG_TYPE_FIRST, + OVS_FRAG_TYPE_LATER, + __OVS_FRAG_TYPE_MAX +}; + +#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) + +struct ovs_key_ethernet { + __u8 eth_src[6]; + __u8 eth_dst[6]; +}; + +struct ovs_key_ipv4 { + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_proto; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_ipv6 { + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be32 ipv6_label; /* 20-bits in least-significant bits. */ + __u8 ipv6_proto; + __u8 ipv6_tclass; + __u8 ipv6_hlimit; + __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_tcp { + __be16 tcp_src; + __be16 tcp_dst; +}; + +struct ovs_key_udp { + __be16 udp_src; + __be16 udp_dst; +}; + +struct ovs_key_icmp { + __u8 icmp_type; + __u8 icmp_code; +}; + +struct ovs_key_icmpv6 { + __u8 icmpv6_type; + __u8 icmpv6_code; +}; + +struct ovs_key_arp { + __be32 arp_sip; + __be32 arp_tip; + __be16 arp_op; + __u8 arp_sha[6]; + __u8 arp_tha[6]; +}; + +struct ovs_key_nd { + __u32 nd_target[4]; + __u8 nd_sll[6]; + __u8 nd_tll[6]; +}; + +/** + * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. + * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow + * key. Always present in notifications. Required for all requests (except + * dumps). + * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying + * the actions to take for packets that match the key. Always present in + * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for + * %OVS_FLOW_CMD_SET requests. + * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this + * flow. Present in notifications if the stats would be nonzero. Ignored in + * requests. + * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the + * TCP flags seen on packets in this flow. Only present in notifications for + * TCP flows, and only if it would be nonzero. Ignored in requests. + * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on + * the system monotonic clock, at which a packet was last processed for this + * flow. Only present in notifications if a packet has been processed for this + * flow. Ignored in requests. + * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the + * last-used time, accumulated TCP flags, and statistics for this flow. + * Otherwise ignored in requests. Never present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_FLOW_* commands. + */ +enum ovs_flow_attr { + OVS_FLOW_ATTR_UNSPEC, + OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ + OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ + OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ + OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ + __OVS_FLOW_ATTR_MAX +}; + +#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) + +/** + * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. + * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with + * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets and intermediate values sample intermediate + * fractions of packets. + * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. + * Actions are passed as nested attributes. + * + * Executes the specified actions with the given probability on a per-packet + * basis. + */ +enum ovs_sample_attr { + OVS_SAMPLE_ATTR_UNSPEC, + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + __OVS_SAMPLE_ATTR_MAX, +}; + +#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) + +/** + * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. + * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION + * message should be sent. Required. + * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the + * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA, + */ +enum ovs_userspace_attr { + OVS_USERSPACE_ATTR_UNSPEC, + OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ + OVS_USERSPACE_ATTR_USERDATA, /* u64 optional user-specified cookie. */ + __OVS_USERSPACE_ATTR_MAX +}; + +#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) + +/** + * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. + * @vlan_tpid: Tag protocol identifier (TPID) to push. + * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set + * (but it will not be set in the 802.1Q header that is pushed). + * + * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID + * values are those that the kernel module also parses as 802.1Q headers, to + * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN + * from having surprising results. + */ +struct ovs_action_push_vlan { + __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ +}; + +/** + * enum ovs_action_attr - Action types. + * + * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. + * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested + * %OVS_USERSPACE_ATTR_* attributes. + * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The + * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its + * value. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the + * packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in + * the nested %OVS_SAMPLE_ATTR_* attributes. + * + * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all + * fields within a header are modifiable, e.g. the IPv4 protocol and fragment + * type may not be changed. + */ + +enum ovs_action_attr { + OVS_ACTION_ATTR_UNSPEC, + OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ + OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ + OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ + OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ + OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ + OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + __OVS_ACTION_ATTR_MAX +}; + +#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) + +#endif /* _LINUX_OPENVSWITCH_H */ -- cgit v1.2.3 From a67ba43d30bf8c1cfdc2615439455302d2408453 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 1 Dec 2011 12:54:31 -0500 Subject: asm-generic/unistd.h: support new process_vm_{readv,write} syscalls Also prototype the "compat" functions so they can be referenced from C code. Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 154bf5683015..66ed067fb729 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -552,5 +552,14 @@ extern ssize_t compat_rw_copy_check_uvector(int type, extern void __user *compat_alloc_user_space(unsigned long len); +asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); +asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.3 From 117632e64d2a5f464e491fe221d7169a3814a77b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2011 21:39:53 +0000 Subject: tcp: take care of misalignments We discovered that TCP stack could retransmit misaligned skbs if a malicious peer acknowledged sub MSS frame. This currently can happen only if output interface is non SG enabled : If SG is enabled, tcp builds headless skbs (all payload is included in fragments), so the tcp trimming process only removes parts of skb fragments, header stay aligned. Some arches cant handle misalignments, so force a head reallocation and shrink headroom to MAX_TCP_HEADER. Dont care about misaligments on x86 and PPC (or other arches setting NET_IP_ALIGN to 0) This patch introduces __pskb_copy() which can specify the headroom of new head, and pskb_copy() becomes a wrapper on top of __pskb_copy() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cec0657d0d32..12e6fed73f8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -568,8 +568,9 @@ extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); -extern struct sk_buff *pskb_copy(struct sk_buff *skb, - gfp_t gfp_mask); +extern struct sk_buff *__pskb_copy(struct sk_buff *skb, + int headroom, gfp_t gfp_mask); + extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); @@ -1799,6 +1800,12 @@ static inline dma_addr_t skb_frag_dma_map(struct device *dev, frag->page_offset + offset, size, dir); } +static inline struct sk_buff *pskb_copy(struct sk_buff *skb, + gfp_t gfp_mask) +{ + return __pskb_copy(skb, skb_headroom(skb), gfp_mask); +} + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check -- cgit v1.2.3 From 8f97339d3feb662037b86a925e692017c0b32323 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Jul 2011 22:48:10 +0100 Subject: netfilter: add ipv4 reverse path filter match This tries to do the same thing as fib_validate_source(), but differs in several aspects. The most important difference is that the reverse path filter built into fib_validate_source uses the oif as iif when performing the reverse lookup. We do not do this, as the oif is not yet known by the time the PREROUTING hook is invoked. We can't wait until FORWARD chain because by the time FORWARD is invoked ipv4 forward path may have already sent icmp messages is response to to-be-discarded-via-rpfilter packets. To avoid the such an additional lookup in PREROUTING, Patrick McHardy suggested to attach the path information directly in the match (i.e., just do what the standard ipv4 path does a bit earlier in PREROUTING). This works, but it also has a few caveats. Most importantly, when using marks in PREROUTING to re-route traffic based on the nfmark, -m rpfilter would have to be used after the nfmark has been set; otherwise the nfmark would have no effect (because the route is already attached). Another problem would be interaction with -j TPROXY, as this target sets an nfmark and uses ACCEPT instead of continue, i.e. such a version of -m rpfilter cannot be used for the initial to-be-intercepted packets. In case in turns out that the oif is required, we can add Patricks suggestion with a new match option (e.g. --rpf-use-oif) to keep ruleset compatibility. Another difference to current builtin ipv4 rpfilter is that packets subject to ipsec transformation are not automatically excluded. If you want this, simply combine -m rpfilter with the policy match. Packets arriving on loopback interfaces always match. Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_rpfilter.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/netfilter/xt_rpfilter.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_rpfilter.h b/include/linux/netfilter/xt_rpfilter.h new file mode 100644 index 000000000000..8358d4f71952 --- /dev/null +++ b/include/linux/netfilter/xt_rpfilter.h @@ -0,0 +1,23 @@ +#ifndef _XT_RPATH_H +#define _XT_RPATH_H + +#include + +enum { + XT_RPFILTER_LOOSE = 1 << 0, + XT_RPFILTER_VALID_MARK = 1 << 1, + XT_RPFILTER_ACCEPT_LOCAL = 1 << 2, + XT_RPFILTER_INVERT = 1 << 3, +#ifdef __KERNEL__ + XT_RPFILTER_OPTION_MASK = XT_RPFILTER_LOOSE | + XT_RPFILTER_VALID_MARK | + XT_RPFILTER_ACCEPT_LOCAL | + XT_RPFILTER_INVERT, +#endif +}; + +struct xt_rpfilter_info { + __u8 flags; +}; + +#endif -- cgit v1.2.3 From 10c6db110d0eb4466b59812c49088ab56218fc2e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 26 Nov 2011 02:47:31 +0100 Subject: perf: Fix loss of notification with multi-event When you do: $ perf record -e cycles,cycles,cycles noploop 10 You expect about 10,000 samples for each event, i.e., 10s at 1000samples/sec. However, this is not what's happening. You get much fewer samples, maybe 3700 samples/event: $ perf report -D | tail -15 Aggregated stats: TOTAL events: 10998 MMAP events: 66 COMM events: 2 SAMPLE events: 10930 cycles stats: TOTAL events: 3644 SAMPLE events: 3644 cycles stats: TOTAL events: 3642 SAMPLE events: 3642 cycles stats: TOTAL events: 3644 SAMPLE events: 3644 On a Intel Nehalem or even AMD64, there are 4 counters capable of measuring cycles, so there is plenty of space to measure those events without multiplexing (even with the NMI watchdog active). And even with multiplexing, we'd expect roughly the same number of samples per event. The root of the problem was that when the event that caused the buffer to become full was not the first event passed on the cmdline, the user notification would get lost. The notification was sent to the file descriptor of the overflowed event but the perf tool was not polling on it. The perf tool aggregates all samples into a single buffer, i.e., the buffer of the first event. Consequently, it assumes notifications for any event will come via that descriptor. The seemingly straight forward solution of moving the waitq into the ringbuffer object doesn't work because of life-time issues. One could perf_event_set_output() on a fd that you're also blocking on and cause the old rb object to be freed while its waitq would still be referenced by the blocked thread -> FAIL. Therefore link all events to the ringbuffer and broadcast the wakeup from the ringbuffer object to all possible events that could be waited upon. This is rather ugly, and we're open to better solutions but it works for now. Reported-by: Stephane Eranian Finished-by: Stephane Eranian Reviewed-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1e9ebe5e0091..b1f89122bf6a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -822,6 +822,7 @@ struct perf_event { int mmap_locked; struct user_struct *mmap_user; struct ring_buffer *rb; + struct list_head rb_entry; /* poll related */ wait_queue_head_t waitq; -- cgit v1.2.3 From bf315173359b2f3b8b8ccca4264815e91f30be12 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 3 Dec 2011 17:06:20 +0000 Subject: regmap: Allow drivers to reinitialise the register cache at runtime Sometimes the register map information may change in ways that drivers can discover at runtime. For example, new revisions of a device may add new registers. Support runtime discovery by drivers by allowing the register cache to be reinitialised with a new function regmap_reinit_cache() which discards the existing cache and creates a new one. Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index bebda1481f23..86923a98a766 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -129,6 +129,8 @@ struct regmap *regmap_init_spi(struct spi_device *dev, const struct regmap_config *config); void regmap_exit(struct regmap *map); +int regmap_reinit_cache(struct regmap *map, + const struct regmap_config *config); int regmap_write(struct regmap *map, unsigned int reg, unsigned int val); int regmap_raw_write(struct regmap *map, unsigned int reg, const void *val, size_t val_len); -- cgit v1.2.3 From 209a600623cf13a8168b2f6b83643db7825abb9a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Dec 2011 16:10:15 +0000 Subject: regmap: Add irq_base accessor to regmap_irq Allows devices to discover their own interrupt without having to remember it themselves. Signed-off-by: Mark Brown --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index bd54cecdfdf8..e7e8953e8c2a 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -190,5 +190,6 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, int irq_base, struct regmap_irq_chip *chip, struct regmap_irq_chip_data **data); void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data); +int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data); #endif -- cgit v1.2.3 From f62ef5f3e9cff065aa845e2b7f487e1810b8e57e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 2 Dec 2011 08:21:43 +0100 Subject: x86, amd: Fix up numa_node information for AMD CPU family 15h model 0-0fh northbridge functions I've received complaints that the numa_node attribute for family 15h model 00-0fh (e.g. Interlagos) northbridge functions shows -1 instead of the proper node ID. Correct this with attached quirks (similar to quirks for other AMD CPU families used in multi-socket systems). Signed-off-by: Andreas Herrmann Cc: Frank Arnold Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20111202072143.GA31916@alberich.amd.com Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 172ba70306d1..2aaee0ca9da8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -517,8 +517,12 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 +#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 +#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603 #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604 +#define PCI_DEVICE_ID_AMD_15H_NB_F5 0x1605 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 27b14b56af081ec7edeefb3a38b2c9577cc5ef48 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 1 Nov 2011 09:09:35 +0800 Subject: tracing: Restore system filter behavior Though not all events have field 'prev_pid', it was allowed to do this: # echo 'prev_pid == 100' > events/sched/filter but commit 75b8e98263fdb0bfbdeba60d4db463259f1fe8a2 (tracing/filter: Swap entire filter of events) broke it without any reason. Link: http://lkml.kernel.org/r/4EAF46CF.8040408@cn.fujitsu.com Signed-off-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 96efa6794ea5..c3da42dd22ba 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -172,6 +172,7 @@ enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, + TRACE_EVENT_FL_NO_SET_FILTER_BIT, }; enum { @@ -179,6 +180,7 @@ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), + TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), }; struct ftrace_event_call { -- cgit v1.2.3 From 63afe12f4be3b08597ae41ce7c0837bfc106b0ac Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Sun, 4 Dec 2011 11:22:52 +0000 Subject: if_ether.h: Add IEEE 802.1 Local Experimental Ethertype 1. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add EthType 0x88b5. This Ethertype value is available for public use for prototype and vendor-specific protocol development,as defined in Amendment 802a to IEEE Std 802. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index e473003e4bda..56d907a2c804 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -79,6 +79,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ +#define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ -- cgit v1.2.3 From 0f5a2601284237e2ba089389fd75d67f77626cef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Nov 2011 14:38:16 +0100 Subject: perf: Avoid a useless pmu_disable() in the perf-tick Gleb writes: > Currently pmu is disabled and re-enabled on each timer interrupt even > when no rotation or frequency adjustment is needed. On Intel CPU this > results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal > it does not cause significant slowdown, but when running perf in a virtual > machine it leads to 20% slowdown on my machine. Cure this by keeping a perf_event_context::nr_freq counter that counts the number of active events that require frequency adjustments and use this in a similar fashion to the already existing nr_events != nr_active test in perf_rotate_context(). By being able to exclude both rotation and frequency adjustments a-priory for the common case we can avoid the otherwise superfluous PMU disable. Suggested-by: Gleb Natapov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-515yhoatehd3gza7we9fapaa@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b1f89122bf6a..cb44c9e75660 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -890,6 +890,7 @@ struct perf_event_context { int nr_active; int is_active; int nr_stat; + int nr_freq; int rotate_disable; atomic_t refcount; struct task_struct *task; -- cgit v1.2.3 From 1e2ad28f80b4e155678259238f51edebc19e4014 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 18 Nov 2011 12:35:21 +0100 Subject: perf, x86: Implement event scheduler helper functions This patch introduces x86 perf scheduler code helper functions. We need this to later add more complex functionality to support overlapping counter constraints (next patch). The algorithm is modified so that the range of weight values is now generated from the constraints. There shouldn't be other functional changes. With the helper functions the scheduler is controlled. There are functions to initialize, traverse the event list, find unused counters etc. The scheduler keeps its own state. V3: * Added macro for_each_set_bit_cont(). * Changed functions interfaces of perf_sched_find_counter() and perf_sched_next_event() to use bool as return value. * Added some comments to make code better understandable. V4: * Fix broken event assignment if weight of the first event is not wmin (perf_sched_init()). Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1321616122-1533-2-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3ef66a2a083..3c1063acb2ab 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w); #include #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_cont(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) static __inline__ int get_bitmask_order(unsigned int count) -- cgit v1.2.3 From b202952075f62603bea9bfb6ebc6b0420db11949 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 27 Nov 2011 17:59:09 +0200 Subject: perf, core: Rate limit perf_sched_events jump_label patching jump_lable patching is very expensive operation that involves pausing all cpus. The patching of perf_sched_events jump_label is easily controllable from userspace by unprivileged user. When te user runs a loop like this: "while true; do perf stat -e cycles true; done" ... the performance of my test application that just increments a counter for one second drops by 4%. This is on a 16 cpu box with my test application using only one of them. An impact on a real server doing real work will be worse. Performance of KVM PMU drops nearly 50% due to jump_lable for "perf record" since KVM PMU implementation creates and destroys perf event frequently. This patch introduces a way to rate limit jump_label patching and uses it to fix the above problem. I believe that as jump_label use will spread the problem will become more common and thus solving it in a generic code is appropriate. Also fixing it in the perf code would result in moving jump_label accounting logic to perf code with all the ifdefs in case of JUMP_LABEL=n kernel. With this patch all details are nicely hidden inside jump_label code. Signed-off-by: Gleb Natapov Acked-by: Jason Baron Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111127155909.GO2557@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 24 ++++++++++++++++++++++++ include/linux/perf_event.h | 6 +++--- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 388b0d425b50..a1e7f909c801 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -3,6 +3,7 @@ #include #include +#include #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) @@ -14,6 +15,12 @@ struct jump_label_key { #endif }; +struct jump_label_key_deferred { + struct jump_label_key key; + unsigned long timeout; + struct delayed_work work; +}; + # include # define HAVE_JUMP_LABEL #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ @@ -51,8 +58,11 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, extern int jump_label_text_reserved(void *start, void *end); extern void jump_label_inc(struct jump_label_key *key); extern void jump_label_dec(struct jump_label_key *key); +extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); extern bool jump_label_enabled(struct jump_label_key *key); extern void jump_label_apply_nops(struct module *mod); +extern void jump_label_rate_limit(struct jump_label_key_deferred *key, + unsigned long rl); #else /* !HAVE_JUMP_LABEL */ @@ -68,6 +78,10 @@ static __always_inline void jump_label_init(void) { } +struct jump_label_key_deferred { + struct jump_label_key key; +}; + static __always_inline bool static_branch(struct jump_label_key *key) { if (unlikely(atomic_read(&key->enabled))) @@ -85,6 +99,11 @@ static inline void jump_label_dec(struct jump_label_key *key) atomic_dec(&key->enabled); } +static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) +{ + jump_label_dec(&key->key); +} + static inline int jump_label_text_reserved(void *start, void *end) { return 0; @@ -102,6 +121,11 @@ static inline int jump_label_apply_nops(struct module *mod) { return 0; } + +static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, + unsigned long rl) +{ +} #endif /* HAVE_JUMP_LABEL */ #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cb44c9e75660..564769cdb473 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1064,12 +1064,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) } } -extern struct jump_label_key perf_sched_events; +extern struct jump_label_key_deferred perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { - if (static_branch(&perf_sched_events)) + if (static_branch(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); } @@ -1078,7 +1078,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - if (static_branch(&perf_sched_events)) + if (static_branch(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } -- cgit v1.2.3 From 69e1e811dcc436a6b129dbef273ad9ec22d095ce Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 1 Dec 2011 17:07:33 -0800 Subject: sched, nohz: Track nr_busy_cpus in the sched_group_power Introduce nr_busy_cpus in the struct sched_group_power [Not in sched_group because sched groups are duplicated for the SD_OVERLAP scheduler domain] and for each cpu that enters and exits idle, this parameter will be updated in each scheduler group of the scheduler domain that this cpu belongs to. To avoid the frequent update of this state as the cpu enters and exits idle, the update of the stat during idle exit is delayed to the first timer tick that happens after the cpu becomes busy. This is done using NOHZ_IDLE flag in the struct rq's nohz_flags. Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20111202010832.555984323@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db17b7622ec..295666cb5b86 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); +extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void select_nohz_load_balancer(int stop_tick) { } +static inline void set_cpu_sd_state_idle(void); #endif /* @@ -901,6 +903,10 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + /* + * Number of busy cpus in this group. + */ + atomic_t nr_busy_cpus; }; struct sched_group { -- cgit v1.2.3 From 3292beb340c76884427faa1f5d6085719477d889 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 28 Nov 2011 14:45:17 -0200 Subject: sched/accounting: Change cpustat fields to an array This patch changes fields in cpustat from a structure, to an u64 array. Math gets easier, and the code is more flexible. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki Cc: Linus Torvalds Cc: Andrew Morton Cc: Paul Tuner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1322498719-2255-2-git-send-email-glommer@parallels.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0cce2db580c3..2fbd9053c2df 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -15,21 +16,25 @@ * used by rstatd/perfmeter */ -struct cpu_usage_stat { - cputime64_t user; - cputime64_t nice; - cputime64_t system; - cputime64_t softirq; - cputime64_t irq; - cputime64_t idle; - cputime64_t iowait; - cputime64_t steal; - cputime64_t guest; - cputime64_t guest_nice; +enum cpu_usage_stat { + CPUTIME_USER, + CPUTIME_NICE, + CPUTIME_SYSTEM, + CPUTIME_SOFTIRQ, + CPUTIME_IRQ, + CPUTIME_IDLE, + CPUTIME_IOWAIT, + CPUTIME_STEAL, + CPUTIME_GUEST, + CPUTIME_GUEST_NICE, + NR_STATS, +}; + +struct kernel_cpustat { + u64 cpustat[NR_STATS]; }; struct kernel_stat { - struct cpu_usage_stat cpustat; #ifndef CONFIG_GENERIC_HARDIRQS unsigned int irqs[NR_IRQS]; #endif @@ -38,10 +43,13 @@ struct kernel_stat { }; DECLARE_PER_CPU(struct kernel_stat, kstat); +DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); -#define kstat_cpu(cpu) per_cpu(kstat, cpu) /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu __get_cpu_var(kstat) +#define kstat_this_cpu (&__get_cpu_var(kstat)) +#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_cpu(cpu) per_cpu(kstat, cpu) +#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) extern unsigned long long nr_context_switches(void); -- cgit v1.2.3 From 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Dec 2011 07:56:43 +0000 Subject: inet_diag: Partly rename inet_ to sock_ The ultimate goal is to get the sock_diag module, that works in family+protocol terms. Currently this is suitable to do on the inet_diag basis, so rename parts of the code. It will be moved to sock_diag.c later. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 8374d2967362..52e48959cfa1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -8,7 +8,7 @@ #define NETLINK_UNUSED 1 /* Unused number */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ #define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_INET_DIAG 4 /* INET socket monitoring */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ @@ -27,6 +27,8 @@ #define NETLINK_RDMA 20 #define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + #define MAX_LINKS 32 struct sockaddr_nl { -- cgit v1.2.3 From 8d34172dfdb762a306cdf58b547aa10d798622ec Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Dec 2011 07:57:06 +0000 Subject: sock_diag: Introduce new message type This type will run the family+protocol based socket dumping. Also prepare the stub function for it. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index abf5028db981..f7baaf637426 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -6,6 +6,7 @@ /* Just some random number */ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 +#define SOCK_DIAG_BY_FAMILY 20 #define INET_DIAG_GETSOCK_MAX 24 -- cgit v1.2.3 From d366477a52f1df29fa066ffb18e4e6101ee2ad04 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Dec 2011 07:58:03 +0000 Subject: sock_diag: Initial skeleton When receiving the SOCK_DIAG_BY_FAMILY message we have to find the handler for provided family and pass the nl message to it. This patch describes an infrastructure to work with such nandlers and implements stubs for AF_INET(6) ones. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/sock_diag.h (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h new file mode 100644 index 000000000000..ba4933b1213b --- /dev/null +++ b/include/linux/sock_diag.h @@ -0,0 +1,23 @@ +#ifndef __SOCK_DIAG_H__ +#define __SOCK_DIAG_H__ +struct sk_buff; +struct nlmsghdr; + +struct sock_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; +}; + +struct sock_diag_handler { + __u8 family; + int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +int sock_diag_register(struct sock_diag_handler *h); +void sock_diag_unregister(struct sock_diag_handler *h); + +void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); + +extern struct sock *sock_diag_nlsk; +#endif -- cgit v1.2.3 From 126fdc3249c9ced2a0d20f916858fec26a445f61 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Dec 2011 07:58:21 +0000 Subject: inet_diag: Introduce new inet_diag_req header This one coinsides with the sock_diag_req in the beginning and contains only used fields from its previous analogue. The existing code is patched to use the _compat version of it for now. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index f7baaf637426..defe8ff36df8 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -23,7 +23,7 @@ struct inet_diag_sockid { /* Request structure */ -struct inet_diag_req { +struct inet_diag_req_compat { __u8 idiag_family; /* Family of addresses. */ __u8 idiag_src_len; __u8 idiag_dst_len; @@ -35,6 +35,15 @@ struct inet_diag_req { __u32 idiag_dbs; /* Tables to dump (NI) */ }; +struct inet_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u8 idiag_ext; + __u8 pad; + __u32 idiag_states; + struct inet_diag_sockid id; +}; + enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, -- cgit v1.2.3 From ac99b862fb98a36929831791da31714f709c2aa8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2011 14:20:14 +0200 Subject: jump_label: Provide jump_label_key initializers Provide two initializers for jump_label_key that initialize it enabled or disabled. Also modify all jump_label code to allow for jump_labels to be initialized enabled. Signed-off-by: Peter Zijlstra Cc: Jason Baron Link: http://lkml.kernel.org/n/tip-p40e3yj21b68y03z1yv825e7@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a1e7f909c801..5ce8b140428f 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -128,4 +128,7 @@ static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, } #endif /* HAVE_JUMP_LABEL */ +#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) +#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) + #endif /* _LINUX_JUMP_LABEL_H */ -- cgit v1.2.3 From fdaabd800bdd60652a448994eeb77442180db6c0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Dec 2011 12:47:55 +0100 Subject: sched: Fix compile error for UP,!NOHZ Commit 69e1e811 ("sched, nohz: Track nr_busy_cpus in the sched_group_power") messed up the static inline function definition. Signed-off-by: Peter Zijlstra Cc: Suresh Siddha Link: http://lkml.kernel.org/n/tip-abjah8ctq5qrjjtdiabe8lph@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 295666cb5b86..64527c499624 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -277,7 +277,7 @@ extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void select_nohz_load_balancer(int stop_tick) { } -static inline void set_cpu_sd_state_idle(void); +static inline void set_cpu_sd_state_idle(void) { } #endif /* -- cgit v1.2.3 From 54858ee5bf659f80a784303e41ee8898fd163f98 Mon Sep 17 00:00:00 2001 From: Alexander Simon Date: Wed, 30 Nov 2011 16:56:32 +0100 Subject: nl80211: Parse channel type attribute in an ibss join request Prepare cfg80211 for IBSS HT: * extend cfg80211 ibss struct with channel_type * Check if extension channel can be used * Export can_beacon_sec_chan for use in mac80211 (will be called from ibss.c later). Signed-off-by: Alexander Simon [siwu@hrz.tu-chemnitz.de: Updates] * fix cfg80211_can_beacon_ext_chan comment * remove implicit channel_type enum assumptions * remove radar channel flags check * add HT IBSS feature flag * reword commit message Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index f51e3bf93a96..a18760684fc9 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2785,9 +2785,11 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back * TX status to the socket error queue when requested with the * socket option. + * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, + NL80211_FEATURE_HT_IBSS = 1 << 1, }; /** -- cgit v1.2.3 From 3df6eaea76a9e1351b539541c0314129a0e4b10c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Dec 2011 10:39:40 +0100 Subject: mac80211: accept public action frames with mismatched BSSID Arik's patch "mac80211: allow action frames with unknown BSSID in GO mode" allowed any action frames in P2P mode to go through, but only to cooked monitor interfaces as the IEEE80211_RX_RA_MATCH was still cleared. As a result my no-monitor patches broke invitation responses. Instead of allowing any action frames in P2P GO mode to go through with a wrong BSSID like that patch did, allow all public action frames. They will never be processed by mac80211, but can be reported via nl80211 then. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 66cedf6eb5c2..17f2a768e2ad 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1694,6 +1694,23 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) return false; } +/** + * ieee80211_is_public_action - check if frame is a public action frame + * @hdr: the frame + * @len: length of the frame + */ +static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, + size_t len) +{ + struct ieee80211_mgmt *mgmt = (void *)hdr; + + if (len < IEEE80211_MIN_ACTION_SIZE) + return false; + if (!ieee80211_is_action(hdr->frame_control)) + return false; + return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; +} + /** * ieee80211_fhss_chan_to_freq - get channel frequency * @channel: the FHSS channel -- cgit v1.2.3 From d310310cbff18ec385c6ab4d58f33b100192a96a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Dec 2011 22:44:39 +0100 Subject: Freezer / sunrpc / NFS: don't allow TASK_KILLABLE sleeps to block the freezer Allow the freezer to skip wait_on_bit_killable sleeps in the sunrpc layer. This should allow suspend and hibernate events to proceed, even when there are RPC's pending on the wire. Also, wrap the TASK_KILLABLE sleeps in NFS layer in freezer_do_not_count and freezer_count calls. This allows the freezer to skip tasks that are sleeping while looping on EJUKEBOX or NFS4ERR_DELAY sorts of errors. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index c1ee2833655e..30f06c220467 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -104,6 +104,29 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } +/* + * These macros are intended to be used whenever you want allow a task that's + * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note + * that neither return any clear indication of whether a freeze event happened + * while in this function. + */ + +/* Like schedule(), but should not block the freezer. */ +#define freezable_schedule() \ +({ \ + freezer_do_not_count(); \ + schedule(); \ + freezer_count(); \ +}) + +/* Like schedule_timeout_killable(), but should not block the freezer. */ +#define freezable_schedule_timeout_killable(timeout) \ +({ \ + freezer_do_not_count(); \ + schedule_timeout_killable(timeout); \ + freezer_count(); \ +}) + /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -163,6 +186,11 @@ static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +#define freezable_schedule() schedule() + +#define freezable_schedule_timeout_killable(timeout) \ + schedule_timeout_killable(timeout) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) -- cgit v1.2.3 From e84b2c202771bbd538866207efcb1f7dbab8045b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Dec 2011 22:19:54 +0100 Subject: PM / Domains: Make it possible to assign names to generic PM domains Add a name member pointer to struct generic_pm_domain and use it in diagnostic messages regarding the domain power-off and power-on latencies. Update the ARM shmobile SH7372 code to assign names to the PM domains used by it. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fbb81bc5065a..fb809b904891 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -50,6 +50,7 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; + char *name; unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ -- cgit v1.2.3 From 02125a826459a6ad142f8d91c5b6357562f96615 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Dec 2011 08:43:34 -0500 Subject: fix apparmor dereferencing potentially freed dentry, sanitize __d_path() API __d_path() API is asking for trouble and in case of apparmor d_namespace_path() getting just that. The root cause is that when __d_path() misses the root it had been told to look for, it stores the location of the most remote ancestor in *root. Without grabbing references. Sure, at the moment of call it had been pinned down by what we have in *path. And if we raced with umount -l, we could have very well stopped at vfsmount/dentry that got freed as soon as prepend_path() dropped vfsmount_lock. It is safe to compare these pointers with pre-existing (and known to be still alive) vfsmount and dentry, as long as all we are asking is "is it the same address?". Dereferencing is not safe and apparmor ended up stepping into that. d_namespace_path() really wants to examine the place where we stopped, even if it's not connected to our namespace. As the result, it looked at ->d_sb->s_magic of a dentry that might've been already freed by that point. All other callers had been careful enough to avoid that, but it's really a bad interface - it invites that kind of trouble. The fix is fairly straightforward, even though it's bigger than I'd like: * prepend_path() root argument becomes const. * __d_path() is never called with NULL/NULL root. It was a kludge to start with. Instead, we have an explicit function - d_absolute_root(). Same as __d_path(), except that it doesn't get root passed and stops where it stops. apparmor and tomoyo are using it. * __d_path() returns NULL on path outside of root. The main caller is show_mountinfo() and that's precisely what we pass root for - to skip those outside chroot jail. Those who don't want that can (and do) use d_path(). * __d_path() root argument becomes const. Everyone agrees, I hope. * apparmor does *NOT* try to use __d_path() or any of its variants when it sees that path->mnt is an internal vfsmount. In that case it's definitely not mounted anywhere and dentry_path() is exactly what we want there. Handling of sysctl()-triggered weirdness is moved to that place. * if apparmor is asked to do pathname relative to chroot jail and __d_path() tells it we it's not in that jail, the sucker just calls d_absolute_path() instead. That's the other remaining caller of __d_path(), BTW. * seq_path_root() does _NOT_ return -ENAMETOOLONG (it's stupid anyway - the normal seq_file logics will take care of growing the buffer and redoing the call of ->show() just fine). However, if it gets path not reachable from root, it returns SEQ_SKIP. The only caller adjusted (i.e. stopped ignoring the return value as it used to do). Reviewed-by: John Johansen ACKed-by: John Johansen Signed-off-by: Al Viro Cc: stable@vger.kernel.org --- include/linux/dcache.h | 3 ++- include/linux/fs.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4df926199369..ed9f74f6c519 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -339,7 +339,8 @@ extern int d_validate(struct dentry *, struct dentry *); */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); -extern char *__d_path(const struct path *path, struct path *root, char *, int); +extern char *__d_path(const struct path *, const struct path *, char *, int); +extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); extern char *dentry_path_raw(struct dentry *, char *, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index e3130220ce3e..019dc558df1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,6 +1942,7 @@ extern int fd_statfs(int, struct kstatfs *); extern int statfs_by_dentry(struct dentry *, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); +extern bool our_mnt(struct vfsmount *mnt); extern int current_umask(void); -- cgit v1.2.3 From e179816ce60033ce560b28e01bc555ed5116cbe9 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:55 +0000 Subject: powerpc/cpuidle: Enable cpuidle and directly call cpuidle_idle_call() for pSeries This patch enables cpuidle for pSeries and pSeries_idle is directly called from the idle loop. As a result of pSeries_idle, cpuidle driver registered with cpuidle subsystem comes into action. On failure of loading of the driver or cpuidle framework default idle is executed as part of the function. This patch also removes the routines pseries_shared_idle_sleep and pseries_dedicated_idle_sleep as they are now implemented as part of pseries_idle cpuidle driver. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- include/linux/cpuidle.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 7408af843b8a..23f81de51829 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -130,7 +130,6 @@ struct cpuidle_driver { #ifdef CONFIG_CPU_IDLE extern void disable_cpuidle(void); extern int cpuidle_idle_call(void); - extern int cpuidle_register_driver(struct cpuidle_driver *drv); struct cpuidle_driver *cpuidle_get_driver(void); extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); @@ -145,7 +144,6 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } - static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } -- cgit v1.2.3 From 581adcbe121872429de76ff9884762de71a76200 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:06 -0800 Subject: memblock: Make memblock_{add|remove|free|reserve}() return int and update prototypes memblock_{add|remove|free|reserve}() return either 0 or -errno but had long as return type. Chage it to int. Also, drop 'extern' from all prototypes in memblock.h - they are unnecessary and used inconsistently (especially if mm.h is included in the picture). Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/memblock.h | 64 ++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ab89b417655c..2f8e28f859b3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -52,15 +52,15 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); -extern void memblock_init(void); -extern void memblock_analyze(void); -extern long memblock_add(phys_addr_t base, phys_addr_t size); -extern long memblock_remove(phys_addr_t base, phys_addr_t size); -extern long memblock_free(phys_addr_t base, phys_addr_t size); -extern long memblock_reserve(phys_addr_t base, phys_addr_t size); +void memblock_init(void); +void memblock_analyze(void); +int memblock_add(phys_addr_t base, phys_addr_t size); +int memblock_remove(phys_addr_t base, phys_addr_t size); +int memblock_free(phys_addr_t base, phys_addr_t size); +int memblock_reserve(phys_addr_t base, phys_addr_t size); -extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, - phys_addr_t *out_end, int *out_nid); +void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid); /** * for_each_free_mem_range - iterate through free memblock areas @@ -80,7 +80,7 @@ extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, __next_free_mem_range(&i, nid, p_start, p_end, p_nid)) #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); +int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); static inline void memblock_set_region_node(struct memblock_region *r, int nid) { @@ -105,37 +105,31 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ -extern phys_addr_t memblock_find_in_range_node(phys_addr_t start, - phys_addr_t end, - phys_addr_t size, - phys_addr_t align, int nid); -extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, - int nid); -extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, - int nid); +phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid); +phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); +phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -extern phys_addr_t memblock_alloc_base(phys_addr_t size, - phys_addr_t align, - phys_addr_t max_addr); -extern phys_addr_t __memblock_alloc_base(phys_addr_t size, - phys_addr_t align, - phys_addr_t max_addr); -extern phys_addr_t memblock_phys_mem_size(void); -extern phys_addr_t memblock_start_of_DRAM(void); -extern phys_addr_t memblock_end_of_DRAM(void); -extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); -extern int memblock_is_memory(phys_addr_t addr); -extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); -extern int memblock_is_reserved(phys_addr_t addr); -extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); - -extern void memblock_dump_all(void); +phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, + phys_addr_t max_addr); +phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, + phys_addr_t max_addr); +phys_addr_t memblock_phys_mem_size(void); +phys_addr_t memblock_start_of_DRAM(void); +phys_addr_t memblock_end_of_DRAM(void); +void memblock_enforce_memory_limit(phys_addr_t memory_limit); +int memblock_is_memory(phys_addr_t addr); +int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +int memblock_is_reserved(phys_addr_t addr); +int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); + +void memblock_dump_all(void); /** * memblock_set_current_limit - Set the current allocation limit to allow @@ -143,7 +137,7 @@ extern void memblock_dump_all(void); * accessible during boot * @limit: New limit value (physical address) */ -extern void memblock_set_current_limit(phys_addr_t limit); +void memblock_set_current_limit(phys_addr_t limit); /* -- cgit v1.2.3 From 4ff7b82f1e5fc65a7c9512b231b4ea533f28541a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:06 -0800 Subject: memblock: Add __memblock_dump_all() Add __memblock_dump_all() which dumps memblock configuration whether memblock_debug is enabled or not. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/memblock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2f8e28f859b3..1a3bee78590f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -129,7 +129,13 @@ int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); int memblock_is_reserved(phys_addr_t addr); int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -void memblock_dump_all(void); +extern void __memblock_dump_all(void); + +static inline void memblock_dump_all(void) +{ + if (memblock_debug) + __memblock_dump_all(); +} /** * memblock_set_current_limit - Set the current allocation limit to allow -- cgit v1.2.3 From c5a1cb284b791fcc3c70962331a682452afaf6cd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Kill sentinel entries at the end of static region arrays memblock no longer depends on having one more entry at the end during addition making the sentinel entries at the end of region arrays not too useful. Remove the sentinels. This eases further updates. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/poison.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 79159de0e341..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -40,12 +40,6 @@ #define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ #define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ -#ifdef CONFIG_PHYS_ADDR_T_64BIT -#define MEMBLOCK_INACTIVE 0x3a84fb0144c9e71bULL -#else -#define MEMBLOCK_INACTIVE 0x44c9e71bUL -#endif - #define SLUB_RED_INACTIVE 0xbb #define SLUB_RED_ACTIVE 0xcc -- cgit v1.2.3 From fe091c208a40299fba40e62292a610fb91e44b4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Kill memblock_init() memblock_init() initializes arrays for regions and memblock itself; however, all these can be done with struct initializers and memblock_init() can be removed. This patch kills memblock_init() and initializes memblock with struct initializer. The only difference is that the first dummy entries don't have .nid set to MAX_NUMNODES initially. This doesn't cause any behavior difference. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1a3bee78590f..6ac91c5b2fd3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -52,7 +52,6 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); -void memblock_init(void); void memblock_analyze(void); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3 From 1440c4e2c918532f39131c3330fe2226e16be7b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: Track total size of regions automatically Total size of memory regions was calculated by memblock_analyze() requiring explicitly calling the function between operations which can change memory regions and possible users of total size, which is cumbersome and fragile. This patch makes each memblock_type track total size automatically with minor modifications to memblock manipulation functions and remove requirements on calling memblock_analyze(). [__]memblock_dump_all() now also dumps the total size of reserved regions. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6ac91c5b2fd3..5bb15005f0f7 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -30,12 +30,12 @@ struct memblock_region { struct memblock_type { unsigned long cnt; /* number of regions */ unsigned long max; /* size of the allocated array */ + phys_addr_t total_size; /* size of all regions */ struct memblock_region *regions; }; struct memblock { phys_addr_t current_limit; - phys_addr_t memory_size; /* Updated by memblock_analyze() */ struct memblock_type memory; struct memblock_type reserved; }; -- cgit v1.2.3 From 1aadc0560f46530f8a0f11055285b876a8a31770 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: s/memblock_analyze()/memblock_allow_resize()/ and update users The only function of memblock_analyze() is now allowing resize of memblock region arrays. Rename it to memblock_allow_resize() and update its users. * The following users remain the same other than renaming. arm/mm/init.c::arm_memblock_init() microblaze/kernel/prom.c::early_init_devtree() powerpc/kernel/prom.c::early_init_devtree() openrisc/kernel/prom.c::early_init_devtree() sh/mm/init.c::paging_init() sparc/mm/init_64.c::paging_init() unicore32/mm/init.c::uc32_memblock_init() * In the following users, analyze was used to update total size which is no longer necessary. powerpc/kernel/machine_kexec.c::reserve_crashkernel() powerpc/kernel/prom.c::early_init_devtree() powerpc/mm/init_32.c::MMU_init() powerpc/mm/tlb_nohash.c::__early_init_mmu() powerpc/platforms/ps3/mm.c::ps3_mm_add_memory() powerpc/platforms/embedded6xx/wii.c::wii_memory_fixups() sh/kernel/machine_kexec.c::reserve_crashkernel() * x86/kernel/e820.c::memblock_x86_fill() was directly setting memblock_can_resize before populating memblock and calling analyze afterwards. Call memblock_allow_resize() before start populating. memblock_can_resize is now static inside memblock.c. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- include/linux/memblock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5bb15005f0f7..c5b3bbc75897 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,7 +42,6 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -extern int memblock_can_resize; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) @@ -52,7 +51,7 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); -void memblock_analyze(void); +void memblock_allow_resize(void); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3 From 7fb0bc3f06fdc3a35e41bcea7a15e53d2515362f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: Implement memblock_add_node() Implement memblock_add_node() which can add a new memblock memory region with specific node ID. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c5b3bbc75897..c7b68f489d46 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -52,6 +52,7 @@ int memblock_free_reserved_regions(void); int memblock_reserve_reserved_regions(void); void memblock_allow_resize(void); +int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3 From 0ee332c1451869963626bf9cac88f165a90990e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:09 -0800 Subject: memblock: Kill early_node_map[] Now all ARCH_POPULATES_NODE_MAP archs select HAVE_MEBLOCK_NODE_MAP - there's no user of early_node_map[] left. Kill early_node_map[] and replace ARCH_POPULATES_NODE_MAP with HAVE_MEMBLOCK_NODE_MAP. Also, relocate for_each_mem_pfn_range() and helper from mm.h to memblock.h as page_alloc.c would no longer host an alternative implementation. This change is ultimately one to one mapping and shouldn't cause any observable difference; however, after the recent changes, there are some functions which now would fit memblock.c better than page_alloc.c and dependency on HAVE_MEMBLOCK_NODE_MAP instead of HAVE_MEMBLOCK doesn't make much sense on some of them. Further cleanups for functions inside HAVE_MEMBLOCK_NODE_MAP in mm.h would be nice. -v2: Fix compile bug introduced by mis-spelling CONFIG_HAVE_MEMBLOCK_NODE_MAP to CONFIG_MEMBLOCK_HAVE_NODE_MAP in mmzone.h. Reported by Stephen Rothwell. Signed-off-by: Tejun Heo Cc: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Tony Luck Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Chen Liqin Cc: Paul Mundt Cc: "David S. Miller" Cc: "H. Peter Anvin" --- include/linux/memblock.h | 23 +++++++++++++++++++--- include/linux/mm.h | 50 +++++++++++------------------------------------- include/linux/mmzone.h | 8 ++++---- 3 files changed, 35 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c7b68f489d46..cd7606b71e5a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -58,6 +58,26 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid); + +/** + * for_each_mem_pfn_range - early memory pfn range iterator + * @i: an integer used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to ulong for start pfn of the range, can be %NULL + * @p_end: ptr to ulong for end pfn of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over configured memory ranges. Available after early_node_map is + * populated. + */ +#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); @@ -101,9 +121,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r) } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -/* The numa aware allocator is only available if - * CONFIG_ARCH_POPULATES_NODE_MAP is set - */ phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b365aee8396..c6f49bea52a3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1252,43 +1252,34 @@ static inline void pgtable_page_dtor(struct page *page) extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its + * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its * zones, allocate the backing mem_map and account for memory holes in a more * architecture independent manner. This is a substitute for creating the * zone_sizes[] and zholes_size[] arrays and passing them to * free_area_init_node() * * An architecture is expected to register range of page frames backed by - * physical memory with add_active_range() before calling + * physical memory with memblock_add[_node]() before calling * free_area_init_nodes() passing in the PFN each zone ends at. At a basic * usage, an architecture is expected to do something like * * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, * max_highmem_pfn}; * for_each_valid_physical_page_range() - * add_active_range(node_id, start_pfn, end_pfn) + * memblock_add_node(base, size, nid) * free_area_init_nodes(max_zone_pfns); * - * If the architecture guarantees that there are no holes in the ranges - * registered with add_active_range(), free_bootmem_active_regions() - * will call free_bootmem_node() for each registered physical page range. - * Similarly sparse_memory_present_with_active_regions() calls - * memory_present() for each range when SPARSEMEM is enabled. + * free_bootmem_with_active_regions() calls free_bootmem_node() for each + * registered physical page range. Similarly + * sparse_memory_present_with_active_regions() calls memory_present() for + * each range when SPARSEMEM is enabled. * * See mm/page_alloc.c for more information on each function exposed by - * CONFIG_ARCH_POPULATES_NODE_MAP + * CONFIG_HAVE_MEMBLOCK_NODE_MAP. */ extern void free_area_init_nodes(unsigned long *max_zone_pfn); -#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP -extern void add_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_all_active_ranges(void); -void sort_node_map(void); -#endif unsigned long node_map_pfn_alignment(void); unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, unsigned long end_pfn); @@ -1303,28 +1294,9 @@ int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); extern void sparse_memory_present_with_active_regions(int nid); -extern void __next_mem_pfn_range(int *idx, int nid, - unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid); - -/** - * for_each_mem_pfn_range - early memory pfn range iterator - * @i: an integer used as loop variable - * @nid: node selector, %MAX_NUMNODES for all nodes - * @p_start: ptr to ulong for start pfn of the range, can be %NULL - * @p_end: ptr to ulong for end pfn of the range, can be %NULL - * @p_nid: ptr to int for nid of the range, can be %NULL - * - * Walks over configured memory ranges. Available after early_node_map is - * populated. - */ -#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ - for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ - i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) - -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ +#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) static inline int __early_pfn_to_nid(unsigned long pfn) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 188cb2ffe8db..3ac040f19369 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -598,13 +598,13 @@ struct zonelist { #endif }; -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP struct node_active_region { unsigned long start_pfn; unsigned long end_pfn; int nid; }; -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #ifndef CONFIG_DISCONTIGMEM /* The array of struct pages - for discontigmem use pgdat->lmem_map */ @@ -720,7 +720,7 @@ extern int movable_zone; static inline int zone_movable_is_highmem(void) { -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP) +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE) return movable_zone == ZONE_HIGHMEM; #else return 0; @@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #endif #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ - !defined(CONFIG_ARCH_POPULATES_NODE_MAP) + !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) static inline unsigned long early_pfn_to_nid(unsigned long pfn) { return 0; -- cgit v1.2.3 From 7bd0b0f0da3b1ec11cbcc798eb0ef747a1184077 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:09 -0800 Subject: memblock: Reimplement memblock allocation using reverse free area iterator Now that all early memory information is in memblock when enabled, we can implement reverse free area iterator and use it to implement NUMA aware allocator which is then wrapped for simpler variants instead of the confusing and inefficient mending of information in separate NUMA aware allocator. Implement for_each_free_mem_range_reverse(), use it to reimplement memblock_find_in_range_node() which in turn is used by all allocators. The visible allocator interface is inconsistent and can probably use some cleanup too. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- include/linux/memblock.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cd7606b71e5a..a6bb10235148 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -46,6 +46,8 @@ extern int memblock_debug; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); int memblock_free_reserved_regions(void); @@ -98,6 +100,26 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, i != (u64)ULLONG_MAX; \ __next_free_mem_range(&i, nid, p_start, p_end, p_nid)) +void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid); + +/** + * for_each_free_mem_range_reverse - rev-iterate through free memblock areas + * @i: u64 used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over free (memory && !reserved) areas of memblock in reverse + * order. Available as soon as memblock is initialized. + */ +#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \ + for (i = (u64)ULLONG_MAX, \ + __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid)) + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); @@ -121,8 +143,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r) } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -- cgit v1.2.3 From 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:17:51 +0100 Subject: PM / Freezer: Remove the "userspace only" constraint from freezer[_do_not]_count() At present, the functions freezer_count() and freezer_do_not_count() impose the restriction that they are effective only for userspace processes. However, now, these functions have found more utility than originally intended by the commit which introduced it: ba96a0c8 (freezer: fix vfork problem). And moreover, even the vfork issue actually does not need the above restriction in these functions. So, modify these functions to make them work even for kernel threads, so that they can be used at other places in the kernel, where the userspace restriction doesn't apply. Suggested-by: Oleg Nesterov Suggested-by: Tejun Heo Acked-by: Tejun Heo Reviewed-by: Oleg Nesterov Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 30f06c220467..7bcfe73d999b 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -67,33 +67,27 @@ static inline bool cgroup_freezing(struct task_struct *task) * appropriately in case the child has exited before the freezing of tasks is * complete. However, we don't want kernel threads to be frozen in unexpected * places, so we allow them to block freeze_processes() instead or to set - * PF_NOFREEZE if needed and PF_FREEZER_SKIP is only set for userland vfork - * parents. Fortunately, in the ____call_usermodehelper() case the parent won't - * really block freeze_processes(), since ____call_usermodehelper() (the child) - * does a little before exec/exit and it can't be frozen before waking up the - * parent. + * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the + * parent won't really block freeze_processes(), since ____call_usermodehelper() + * (the child) does a little before exec/exit and it can't be frozen before + * waking up the parent. */ -/* - * If the current task is a user space one, tell the freezer not to count it as - * freezable. - */ + +/* Tell the freezer not to count the current task as freezable. */ static inline void freezer_do_not_count(void) { - if (current->mm) - current->flags |= PF_FREEZER_SKIP; + current->flags |= PF_FREEZER_SKIP; } /* - * If the current task is a user space one, tell the freezer to count it as - * freezable again and try to freeze it. + * Tell the freezer to count the current task as freezable again and try to + * freeze it. */ static inline void freezer_count(void) { - if (current->mm) { - current->flags &= ~PF_FREEZER_SKIP; - try_to_freeze(); - } + current->flags &= ~PF_FREEZER_SKIP; + try_to_freeze(); } /* -- cgit v1.2.3 From 33e638b9070ba5e8812836e20390da6a6af13900 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:18:12 +0100 Subject: PM / Sleep: Use the freezer_count() functions in [un]lock_system_sleep() APIs Now that freezer_count() and freezer_do_not_count() don't have the restriction that they are effective only when called by userspace processes, use them in lock_system_sleep() and unlock_system_sleep() instead of open-coding their parts. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 1f7fff47cfac..906d62cfc15c 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_VT @@ -380,16 +381,14 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { - /* simplified freezer_do_not_count() */ - current->flags |= PF_FREEZER_SKIP; + freezer_do_not_count(); mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); - /* simplified freezer_count() */ - current->flags &= ~PF_FREEZER_SKIP; + freezer_count(); } #endif -- cgit v1.2.3 From 9b6fc5dc879bc90f765db0e95eefcf123d0d06dd Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:24:38 +0100 Subject: PM / Sleep: Make [un]lock_system_sleep() generic The [un]lock_system_sleep() APIs were originally introduced to mutually exclude memory hotplug and hibernation. Directly using mutex_lock(&pm_mutex) to achieve mutual exclusion with suspend or hibernation code can lead to freezing failures. However, the APIs [un]lock_system_sleep() can be safely used to achieve the same, without causing freezing failures. So, since it would be beneficial to modify all the existing users of mutex_lock(&pm_mutex) (in all parts of the kernel), so that they use these safe APIs intead, make these APIs generic by removing the restriction that they work only when CONFIG_HIBERNATE_CALLBACKS is set. Moreover, that restriction didn't buy us anything anyway. Suggested-by: Tejun Heo Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 906d62cfc15c..95040cc33107 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -332,6 +332,8 @@ static inline bool system_entering_hibernation(void) { return false; } #define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ #define PM_POST_RESTORE 0x0006 /* Restore failed */ +extern struct mutex pm_mutex; + #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); @@ -352,6 +354,19 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count); extern bool pm_save_wakeup_count(unsigned int count); + +static inline void lock_system_sleep(void) +{ + freezer_do_not_count(); + mutex_lock(&pm_mutex); +} + +static inline void unlock_system_sleep(void) +{ + mutex_unlock(&pm_mutex); + freezer_count(); +} + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -367,30 +382,11 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } -#endif /* !CONFIG_PM_SLEEP */ - -extern struct mutex pm_mutex; -#ifndef CONFIG_HIBERNATE_CALLBACKS static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} -#else - -/* Let some subsystems like memory hotadd exclude hibernation */ - -static inline void lock_system_sleep(void) -{ - freezer_do_not_count(); - mutex_lock(&pm_mutex); -} - -static inline void unlock_system_sleep(void) -{ - mutex_unlock(&pm_mutex); - freezer_count(); -} -#endif +#endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* -- cgit v1.2.3 From 7da82c06ded105bf601bfa0eafc92e84eb0ceeed Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Dec 2011 04:11:15 +0000 Subject: vlan: rename vlan_dev_info to vlan_dev_priv As this structure is priv, name it approprietely. Also for pointer to it use name "vlan". Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 070ac50c1d2d..31d7c976f063 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -386,7 +386,7 @@ struct vlan_ioctl_args { unsigned int skb_priority; unsigned int name_type; unsigned int bind_type; - unsigned int flag; /* Matches vlan_dev_info flags */ + unsigned int flag; /* Matches vlan_dev_priv flags */ } u; short vlan_qos; -- cgit v1.2.3 From 8e586137e6b63af1e881b328466ab5ffbe562510 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Dec 2011 19:52:37 -0500 Subject: net: make vlan ndo_vlan_rx_[add/kill]_vid return error value Let caller know the result of adding/removing vlan id to/from vlan filter. In some drivers I make those functions to just return 0. But in those where there is able to see if hw setup went correctly, return value is set appropriately. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eef257c76a40..f7bff9615728 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -792,11 +792,11 @@ struct netdev_tc_txq { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * - * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) * this function is called when a VLAN id is registered. * - * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) * this function is called when a VLAN id is unregistered. * @@ -911,9 +911,9 @@ struct net_device_ops { struct rtnl_link_stats64 *storage); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); - void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); - void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); -- cgit v1.2.3 From 87002b03baabd2b8f6281ab6411ed88d24958de1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Dec 2011 04:11:17 +0000 Subject: net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls This patch adds wrapper for ndo_vlan_rx_add_vid/ndo_vlan_rx_kill_vid functions. Check for NETIF_F_HW_VLAN_FILTER feature is done in this wrapper. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 31d7c976f063..71168a6f3347 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -109,6 +109,9 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern bool vlan_do_receive(struct sk_buff **skb, bool last_handler); extern struct sk_buff *vlan_untag(struct sk_buff *skb); +extern int vlan_vid_add(struct net_device *dev, unsigned short vid); +extern void vlan_vid_del(struct net_device *dev, unsigned short vid); + #else static inline struct net_device * __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id) @@ -139,6 +142,15 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb) { return skb; } + +static inline int vlan_vid_add(struct net_device *dev, unsigned short vid) +{ + return 0; +} + +static inline void vlan_vid_del(struct net_device *dev, unsigned short vid) +{ +} #endif /** -- cgit v1.2.3 From 5b9ea6e022e9ba0fe39cb349ac40361f78d5da5b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Dec 2011 04:11:18 +0000 Subject: vlan: introduce vid list with reference counting This allows to keep track of vids needed to be in rx vlan filters of devices even if they are used in bond/team etc. vlan_info as well as vlan_group previously was, is allocated when first vid is added and dealocated whan last vid is deleted. vlan_group definition is moved to private header. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 17 +---------------- include/linux/netdevice.h | 3 +-- 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 71168a6f3347..0c9691305298 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -74,22 +74,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -/* if this changes, algorithm will have to be reworked because this - * depends on completely exhausting the VLAN identifier space. Thus - * it gives constant time look-up, but in many cases it wastes memory. - */ -#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 -#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) - -struct vlan_group { - struct net_device *real_dev; /* The ethernet(like) device - * the vlan is attached to. - */ - unsigned int nr_vlans; - struct hlist_node hlist; /* linked list */ - struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; - struct rcu_head rcu; -}; +struct vlan_info; static inline int is_vlan_dev(struct net_device *dev) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f7bff9615728..603730804da5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -55,7 +55,6 @@ #include -struct vlan_group; struct netpoll_info; struct phy_device; /* 802.11 specific */ @@ -1096,7 +1095,7 @@ struct net_device { /* Protocol specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) - struct vlan_group __rcu *vlgrp; /* VLAN group */ + struct vlan_info __rcu *vlan_info; /* VLAN info */ #endif #if IS_ENABLED(CONFIG_NET_DSA) struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ -- cgit v1.2.3 From 348a1443cc4303c72cf1ee3b26e476fec8e7b5fa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Dec 2011 04:11:19 +0000 Subject: vlan: introduce functions to do mass addition/deletion of vids by another device Introduce functions handy to copy vlan ids from one driver's list to another. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 0c9691305298..13aff1e2183b 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -97,6 +97,10 @@ extern struct sk_buff *vlan_untag(struct sk_buff *skb); extern int vlan_vid_add(struct net_device *dev, unsigned short vid); extern void vlan_vid_del(struct net_device *dev, unsigned short vid); +extern int vlan_vids_add_by_dev(struct net_device *dev, + const struct net_device *by_dev); +extern void vlan_vids_del_by_dev(struct net_device *dev, + const struct net_device *by_dev); #else static inline struct net_device * __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id) @@ -136,6 +140,17 @@ static inline int vlan_vid_add(struct net_device *dev, unsigned short vid) static inline void vlan_vid_del(struct net_device *dev, unsigned short vid) { } + +static inline int vlan_vids_add_by_dev(struct net_device *dev, + const struct net_device *by_dev) +{ + return 0; +} + +static inline void vlan_vids_del_by_dev(struct net_device *dev, + const struct net_device *by_dev) +{ +} #endif /** -- cgit v1.2.3 From 8af2a218de38f51ea4b4fa48cac1273319ae260c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Dec 2011 06:06:03 +0000 Subject: sch_red: Adaptative RED AQM Adaptative RED AQM for linux, based on paper from Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker, August 2001 : http://icir.org/floyd/papers/adaptiveRed.pdf Goal of Adaptative RED is to make max_p a dynamic value between 1% and 50% to reach the target average queue : (max_th - min_th) / 2 Every 500 ms: if (avg > target and max_p <= 0.5) increase max_p : max_p += alpha; else if (avg < target and max_p >= 0.01) decrease max_p : max_p *= beta; target :[min_th + 0.4*(min_th - max_th), min_th + 0.6*(min_th - max_th)]. alpha : min(0.01, max_p / 4) beta : 0.9 max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa) Changes against our RED implementation are : max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32 fixed point number, to allow full range described in Adatative paper. To deliver a random number, we now use a reciprocal divide (thats really a multiply), but this operation is done once per marked/droped packet when in RED_BETWEEN_TRESH window, so added cost (compared to previous AND operation) is near zero. dump operation gives current max_p value in a new TCA_RED_MAX_P attribute. Example on a 10Mbit link : tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \ limit 400000 min 30000 max 90000 avpkt 1000 \ burst 55 ecn adaptative bandwidth 10Mbit # tc -s -d qdisc show dev eth3 ... qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn adaptative ewma 5 max_p=0.113335 Scell_log 15 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0) rate 9749Kbit 831pps backlog 72056b 16p requeues 0 marked 1357 early 35 pdrop 0 other 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index fb556dc594d3..e41e0d4de24b 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -181,6 +181,7 @@ enum { TCA_RED_UNSPEC, TCA_RED_PARMS, TCA_RED_STAB, + TCA_RED_MAX_P, __TCA_RED_MAX, }; @@ -194,8 +195,9 @@ struct tc_red_qopt { unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ unsigned char Scell_log; /* cell size for idle damping */ unsigned char flags; -#define TC_RED_ECN 1 -#define TC_RED_HARDDROP 2 +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 }; struct tc_red_xstats { -- cgit v1.2.3 From 83aeeada7c69f35e5100b27ec354335597a7a488 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 8 Dec 2011 14:33:54 -0800 Subject: vmscan: use atomic-long for shrinker batching Use atomic-long operations instead of looping around cmpxchg(). [akpm@linux-foundation.org: massage atomic.h inclusions] Signed-off-by: Konstantin Khlebnikov Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/mm.h | 1 + include/linux/shrinker.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 019dc558df1a..e0bc4ffb8e7f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -393,8 +393,8 @@ struct inodes_stat_t { #include #include #include -#include #include +#include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 3dc3a8c2c485..4baadd18f4ad 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index a83833a1f7a2..07ceb97d53fa 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -35,7 +35,7 @@ struct shrinker { /* These are for internal use */ struct list_head list; - long nr; /* objs pending delete */ + atomic_long_t nr_in_batch; /* objs pending delete */ }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ extern void register_shrinker(struct shrinker *); -- cgit v1.2.3 From a73ed26bbae7327370c5bd298f07de78df9e3466 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Dec 2011 02:46:45 +0000 Subject: sch_red: generalize accurate MAX_P support to RED/GRED/CHOKE Now RED uses a Q0.32 number to store max_p (max probability), allow RED/GRED/CHOKE to use/report full resolution at config/dump time. Old tc binaries are non aware of new attributes, and still set/get Plog. New tc binary set/get both Plog and max_p for backward compatibility, they display "probability value" if they get max_p from new kernels. # tc -d qdisc show dev ... ... qdisc red 10: parent 1:1 limit 360Kb min 30Kb max 90Kb ecn ewma 5 probability 0.09 Scell_log 15 Make sure we avoid potential divides by 0 in reciprocal_value(), if (max_th - min_th) is big. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e41e0d4de24b..8786ea741f52 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -216,6 +216,7 @@ enum { TCA_GRED_PARMS, TCA_GRED_STAB, TCA_GRED_DPS, + TCA_GRED_MAX_P, __TCA_GRED_MAX, }; @@ -255,6 +256,7 @@ enum { TCA_CHOKE_UNSPEC, TCA_CHOKE_PARMS, TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, __TCA_CHOKE_MAX, }; -- cgit v1.2.3 From 7b35eadd7eee2e0b42421ce3efbc30f1c3c745e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 9 Dec 2011 06:21:16 +0000 Subject: inet_diag: Remove indirect sizeof from inet diag handlers There's an info_size value stored on inet_diag_handler, but for existing code this value is effectively constant, so just use sizeof(struct tcp_info) where required. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index defe8ff36df8..851feff0747f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -141,7 +141,6 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); - __u16 idiag_info_size; __u16 idiag_type; }; -- cgit v1.2.3 From b005ab4ef8805dc4604848c9d2ccca9d71f8fc46 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 9 Dec 2011 06:21:53 +0000 Subject: inet_diag: Export inet diag cookie checking routine The netlink diag susbsys stores sk address bits in the nl message as a "cookie" and uses one when dumps details about particular socket. The same will be required for udp diag module, so introduce a heler in inet_diag module Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 851feff0747f..503674738368 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -144,6 +144,8 @@ struct inet_diag_handler { __u16 idiag_type; }; +int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req); + extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 8d07d1518a074a08b90be02eee5ee15e60ac9779 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 9 Dec 2011 06:22:44 +0000 Subject: inet_diag: Introduce the byte-code run on an inet socket The upcoming UDP module will require exactly this ability, so just move the existing code to provide one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 503674738368..907c899bd41b 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -135,6 +135,7 @@ struct tcpvegas_info { #ifdef __KERNEL__ struct sock; struct inet_hashinfo; +struct nlattr; struct inet_diag_handler { struct inet_hashinfo *idiag_hashinfo; @@ -144,6 +145,7 @@ struct inet_diag_handler { __u16 idiag_type; }; +int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req); extern int inet_diag_register(const struct inet_diag_handler *handler); -- cgit v1.2.3 From 3c4d05c8056724aff3abc20650807dd828fded54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 9 Dec 2011 06:23:00 +0000 Subject: inet_diag: Introduce the inet socket dumping routine The existing inet_csk_diag_fill dumps the inet connection sock info into the netlink inet_diag_message. Prepare this routine to be able to dump only the inet_sock part of a socket if the icsk part is missing. This will be used by UDP diag module when dumping UDP sockets. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 907c899bd41b..eaf5865c9e8a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -136,6 +136,8 @@ struct tcpvegas_info { struct sock; struct inet_hashinfo; struct nlattr; +struct nlmsghdr; +struct sk_buff; struct inet_diag_handler { struct inet_hashinfo *idiag_hashinfo; @@ -145,6 +147,11 @@ struct inet_diag_handler { __u16 idiag_type; }; +struct inet_connection_sock; +int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req); -- cgit v1.2.3 From 1942c518ca017f376b267a7c5e78c15d37202442 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 9 Dec 2011 06:23:18 +0000 Subject: inet_diag: Generalize inet_diag dump and get_exact calls Introduce two callbacks in inet_diag_handler -- one for dumping all sockets (with filters) and the other one for dumping a single sk. Replace direct calls to icsk handlers with indirect calls to callbacks provided by handlers. Make existing TCP and DCCP handlers use provided helpers for icsk-s. The UDP diag module will provide its own. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index eaf5865c9e8a..78972a149dff 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -138,9 +138,18 @@ struct inet_hashinfo; struct nlattr; struct nlmsghdr; struct sk_buff; +struct netlink_callback; struct inet_diag_handler { - struct inet_hashinfo *idiag_hashinfo; + void (*dump)(struct sk_buff *skb, + struct netlink_callback *cb, + struct inet_diag_req *r, + struct nlattr *bc); + + int (*dump_one)(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct inet_diag_req *req); + void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); @@ -152,6 +161,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh); +void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, + struct netlink_callback *cb, struct inet_diag_req *r, + struct nlattr *bc); +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req); -- cgit v1.2.3 From 925b44a273aa8c4c23c006c1228aacd538eead09 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 8 Dec 2011 23:27:28 +0100 Subject: PM / Domains: Provide an always on power domain governor Since systems are likely to have power domains that can't be turned off for various reasons at least temporarily while implementing power domain support provide a default governor which will always refuse to power off the domain, saving platforms having to implement their own. Since the code is so tiny don't bother with a Kconfig symbol for it. Signed-off-by: Mark Brown Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fb809b904891..a03a0ad998b8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -140,6 +140,7 @@ extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern bool default_stop_ok(struct device *dev); +extern struct dev_power_governor pm_domain_always_on_gov; #else static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) @@ -193,6 +194,7 @@ static inline bool default_stop_ok(struct device *dev) { return false; } +#define pm_domain_always_on_gov NULL #endif static inline int pm_genpd_remove_callbacks(struct device *dev) -- cgit v1.2.3 From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 9 Dec 2011 23:36:36 +0100 Subject: PM / Sleep: Fix freezer failures due to racy usermodehelper_is_disabled() Commit a144c6a (PM: Print a warning if firmware is requested when tasks are frozen) introduced usermodehelper_is_disabled() to warn and exit immediately if firmware is requested when usermodehelpers are disabled. However, it is racy. Consider the following scenario, currently used in drivers/base/firmware_class.c: ... if (usermodehelper_is_disabled()) goto out; /* Do actual work */ ... out: return err; Nothing prevents someone from disabling usermodehelpers just after the check in the 'if' condition, which means that it is quite possible to try doing the "actual work" with usermodehelpers disabled, leading to undesirable consequences. In particular, this race condition in _request_firmware() causes task freezing failures whenever suspend/hibernation is in progress because, it wrongly waits to get the firmware/microcode image from userspace when actually the usermodehelpers are disabled or userspace has been frozen. Some of the example scenarios that cause freezing failures due to this race are those that depend on userspace via request_firmware(), such as x86 microcode module initialization and microcode image reload. Previous discussions about this issue can be found at: http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591 This patch adds proper synchronization to fix this issue. It is to be noted that this patchset fixes the freezing failures but doesn't remove the warnings. IOW, it does not attempt to add explicit synchronization to x86 microcode driver to avoid requesting microcode image at inopportune moments. Because, the warnings were introduced to highlight such cases, in the first place. And we need not silence the warnings, since we take care of the *real* problem (freezing failure) and hence, after that, the warnings are pretty harmless anyway. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/kmod.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index b16f65390734..722f477c4ef7 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -117,5 +117,7 @@ extern void usermodehelper_init(void); extern int usermodehelper_disable(void); extern void usermodehelper_enable(void); extern bool usermodehelper_is_disabled(void); +extern void read_lock_usermodehelper(void); +extern void read_unlock_usermodehelper(void); #endif /* __LINUX_KMOD_H__ */ -- cgit v1.2.3 From 5a3072be6ce00b10565c78da05ad78df41310045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 8 Dec 2011 22:53:29 +0100 Subject: drivers_base: make argument to platform_device_register_full const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit platform_device_register_full doesn't modify *pdevinfo so it can be marked as const without further adaptions. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 165a8d175370..5622fa24e97b 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -63,7 +63,7 @@ struct platform_device_info { u64 dma_mask; }; extern struct platform_device *platform_device_register_full( - struct platform_device_info *pdevinfo); + const struct platform_device_info *pdevinfo); /** * platform_device_register_resndata - add a platform-level device with -- cgit v1.2.3 From 6de5fc9cf7de334912de4cfd2d06eb2d744d2afe Mon Sep 17 00:00:00 2001 From: Stefan Nilsson XK Date: Thu, 3 Nov 2011 09:44:12 +0100 Subject: mmc: core: Add quirk for long data read time Adds a quirk that sets the data read timeout to a fixed value instead of relying on the information in the CSD. The timeout value chosen is 300ms since that has proven enough for the problematic cards found, but could be increased if other cards require this. This patch also enables this quirk for certain Micron cards known to have this problem. Signed-off-by: Stefan Nilsson XK Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 415f2db414e1..c8ef9bc54d50 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -218,6 +218,7 @@ struct mmc_card { #define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ #define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */ #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8) /* Avoid sending 512 bytes in */ +#define MMC_QUIRK_LONG_READ_TIME (1<<9) /* Data read time > CSD says */ /* byte mode */ unsigned int poweroff_notify_state; /* eMMC4.5 notify feature */ #define MMC_NO_POWER_NOTIFICATION 0 @@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512; } +static inline int mmc_card_long_read_time(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_LONG_READ_TIME; +} + #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) (dev_name(&(c)->dev)) -- cgit v1.2.3 From 9b2e4f1880b789be1f24f9684f7a54b90310b5c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Sep 2011 12:10:22 -0700 Subject: rcu: Track idleness independent of idle tasks Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/hardirq.h | 21 --------------------- include/linux/rcupdate.h | 21 ++++----------------- include/linux/tick.h | 11 +++++++++-- 3 files changed, 13 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f743883f769e..bb7f30971858 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) extern void account_system_vtime(struct task_struct *tsk); #endif -#if defined(CONFIG_NO_HZ) #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -static inline void rcu_irq_enter(void) -{ - rcu_exit_nohz(); -} - -static inline void rcu_irq_exit(void) -{ - rcu_enter_nohz(); -} static inline void rcu_nmi_enter(void) { @@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void) } #else -extern void rcu_irq_enter(void); -extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); extern void rcu_nmi_exit(void); #endif -#else -# define rcu_irq_enter() do { } while (0) -# define rcu_irq_exit() do { } while (0) -# define rcu_nmi_enter() do { } while (0) -# define rcu_nmi_exit() do { } while (0) -#endif /* #if defined(CONFIG_NO_HZ) */ /* * It is safe to do non-atomic ops on ->hardirq_context, diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2cf4226ade7e..cd1ad4b04c6d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -177,23 +177,10 @@ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_check_callbacks(int cpu, int user); struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) -{ -} - -static inline void rcu_exit_nohz(void) -{ -} - -#endif /* #else #ifdef CONFIG_NO_HZ */ +extern void rcu_idle_enter(void); +extern void rcu_idle_exit(void); +extern void rcu_irq_enter(void); +extern void rcu_irq_exit(void); /* * Infrastructure to implement the synchronize_() primitives in diff --git a/include/linux/tick.h b/include/linux/tick.h index b232ccc0ee29..ca40838fdfb7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -127,8 +127,15 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else -static inline void tick_nohz_stop_sched_tick(int inidle) { } -static inline void tick_nohz_restart_sched_tick(void) { } +static inline void tick_nohz_stop_sched_tick(int inidle) +{ + if (inidle) + rcu_idle_enter(); +} +static inline void tick_nohz_restart_sched_tick(void) +{ + rcu_idle_exit(); +} static inline ktime_t tick_nohz_get_sleep_length(void) { ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; -- cgit v1.2.3 From 91afaf300269aa99a4d646969b3258b74294ac4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Oct 2011 07:44:32 -0700 Subject: rcu: Add failure tracing to rcutorture Trace the rcutorture RCU accesses and dump the trace buffer when the first failure is detected. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index cd1ad4b04c6d..8d315b013e37 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) extern void rcutorture_record_test_transition(void); extern void rcutorture_record_progress(unsigned long vernum); +extern void do_trace_rcu_torture_read(char *rcutorturename, + struct rcu_head *rhp); #else static inline void rcutorture_record_test_transition(void) { @@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void) static inline void rcutorture_record_progress(unsigned long vernum) { } +#ifdef CONFIG_RCU_TRACE +extern void do_trace_rcu_torture_read(char *rcutorturename, + struct rcu_head *rhp); +#else +#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#endif #endif #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) -- cgit v1.2.3 From e6b80a3b0994ea6c3d876d72464f2debbfcfeb05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 16:25:18 -0700 Subject: rcu: Detect illegal rcu dereference in extended quiescent state Report that none of the rcu read lock maps are held while in an RCU extended quiescent state (the section between rcu_idle_enter() and rcu_idle_exit()). This helps detect any use of rcu_dereference() and friends from within the section in idle where RCU is not allowed. This way we can guarantee an extended quiescent window where the CPU can be put in dyntick idle mode or can simply aoid to be part of any global grace period completion while in the idle loop. Uses of RCU from such mode are totally ignored by RCU, hence the importance of these checks. Signed-off-by: Frederic Weisbecker Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Lai Jiangshan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8d315b013e37..bf91fcfe181c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -228,6 +228,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) #ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_PROVE_RCU +extern int rcu_is_cpu_idle(void); +#else /* !CONFIG_PROVE_RCU */ +static inline int rcu_is_cpu_idle(void) +{ + return 0; +} +#endif /* else !CONFIG_PROVE_RCU */ + extern struct lockdep_map rcu_lock_map; # define rcu_read_acquire() \ lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) @@ -262,6 +271,8 @@ static inline int rcu_read_lock_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; + if (rcu_is_cpu_idle()) + return 0; return lock_is_held(&rcu_lock_map); } @@ -285,6 +296,19 @@ extern int rcu_read_lock_bh_held(void); * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. + * + * Note that if the CPU is in the idle loop from an RCU point of + * view (ie: that we are in the section between rcu_idle_enter() and + * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU + * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs + * that are in such a section, considering these as in extended quiescent + * state, so such a CPU is effectively never in an RCU read-side critical + * section regardless of what RCU primitives it invokes. This state of + * affairs is required --- we need to keep an RCU-free window in idle + * where the CPU may possibly enter into low power mode. This way we can + * notice an extended quiescent state to other CPUs that started a grace + * period. Otherwise we would delay any grace period as long as we run in + * the idle task. */ #ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) @@ -293,6 +317,8 @@ static inline int rcu_read_lock_sched_held(void) if (!debug_lockdep_rcu_enabled()) return 1; + if (rcu_is_cpu_idle()) + return 0; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); -- cgit v1.2.3 From 00f49e5729af602deb559b0cf293a00b625e8636 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:02 +0200 Subject: rcu: Warn when rcu_read_lock() is used in extended quiescent state We are currently able to detect uses of rcu_dereference_check() inside extended quiescent states (such as the RCU-free window in idle). But rcu_read_lock() and friends can be used without rcu_dereference(), so that the earlier commit checking for use of rcu_dereference() and friends while in RCU idle mode miss some error conditions. This commit therefore adds extended quiescent state checking to rcu_read_lock() and friends. Uses of RCU from within RCU-idle mode are totally ignored by RCU, hence the importance of these checks. Signed-off-by: Frederic Weisbecker Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Lai Jiangshan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 52 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bf91fcfe181c..d201c155f70c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -237,21 +237,53 @@ static inline int rcu_is_cpu_idle(void) } #endif /* else !CONFIG_PROVE_RCU */ +static inline void rcu_lock_acquire(struct lockdep_map *map) +{ + WARN_ON_ONCE(rcu_is_cpu_idle()); + lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); +} + +static inline void rcu_lock_release(struct lockdep_map *map) +{ + WARN_ON_ONCE(rcu_is_cpu_idle()); + lock_release(map, 1, _THIS_IP_); +} + extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) + +static inline void rcu_read_acquire(void) +{ + rcu_lock_acquire(&rcu_lock_map); +} + +static inline void rcu_read_release(void) +{ + rcu_lock_release(&rcu_lock_map); +} extern struct lockdep_map rcu_bh_lock_map; -# define rcu_read_acquire_bh() \ - lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_) + +static inline void rcu_read_acquire_bh(void) +{ + rcu_lock_acquire(&rcu_bh_lock_map); +} + +static inline void rcu_read_release_bh(void) +{ + rcu_lock_release(&rcu_bh_lock_map); +} extern struct lockdep_map rcu_sched_lock_map; -# define rcu_read_acquire_sched() \ - lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release_sched() \ - lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) + +static inline void rcu_read_acquire_sched(void) +{ + rcu_lock_acquire(&rcu_sched_lock_map); +} + +static inline void rcu_read_release_sched(void) +{ + rcu_lock_release(&rcu_sched_lock_map); +} extern int debug_lockdep_rcu_enabled(void); -- cgit v1.2.3 From d8ab29f8be918b34a1ccd174569a53f0eb04b0a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Oct 2011 18:22:03 +0200 Subject: rcu: Remove one layer of abstraction from PROVE_RCU checking Simplify things a bit by substituting the definitions of the single-line rcu_read_acquire(), rcu_read_release(), rcu_read_acquire_bh(), rcu_read_release_bh(), rcu_read_acquire_sched(), and rcu_read_release_sched() functions at their call points. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 53 ++++++++---------------------------------------- 1 file changed, 8 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d201c155f70c..5dd6fd8b3203 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -250,41 +250,8 @@ static inline void rcu_lock_release(struct lockdep_map *map) } extern struct lockdep_map rcu_lock_map; - -static inline void rcu_read_acquire(void) -{ - rcu_lock_acquire(&rcu_lock_map); -} - -static inline void rcu_read_release(void) -{ - rcu_lock_release(&rcu_lock_map); -} - extern struct lockdep_map rcu_bh_lock_map; - -static inline void rcu_read_acquire_bh(void) -{ - rcu_lock_acquire(&rcu_bh_lock_map); -} - -static inline void rcu_read_release_bh(void) -{ - rcu_lock_release(&rcu_bh_lock_map); -} - extern struct lockdep_map rcu_sched_lock_map; - -static inline void rcu_read_acquire_sched(void) -{ - rcu_lock_acquire(&rcu_sched_lock_map); -} - -static inline void rcu_read_release_sched(void) -{ - rcu_lock_release(&rcu_sched_lock_map); -} - extern int debug_lockdep_rcu_enabled(void); /** @@ -364,12 +331,8 @@ static inline int rcu_read_lock_sched_held(void) #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -# define rcu_read_acquire_bh() do { } while (0) -# define rcu_read_release_bh() do { } while (0) -# define rcu_read_acquire_sched() do { } while (0) -# define rcu_read_release_sched() do { } while (0) +# define rcu_lock_acquire(a) do { } while (0) +# define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) { @@ -690,7 +653,7 @@ static inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); - rcu_read_acquire(); + rcu_lock_acquire(&rcu_lock_map); } /* @@ -710,7 +673,7 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { - rcu_read_release(); + rcu_lock_release(&rcu_lock_map); __release(RCU); __rcu_read_unlock(); } @@ -731,7 +694,7 @@ static inline void rcu_read_lock_bh(void) { local_bh_disable(); __acquire(RCU_BH); - rcu_read_acquire_bh(); + rcu_lock_acquire(&rcu_bh_lock_map); } /* @@ -741,7 +704,7 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_read_release_bh(); + rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); } @@ -758,7 +721,7 @@ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); - rcu_read_acquire_sched(); + rcu_lock_acquire(&rcu_sched_lock_map); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -775,7 +738,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_read_release_sched(); + rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); } -- cgit v1.2.3 From ff195cb69ba8d2af9b891be3a26db95fe1999d43 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Oct 2011 18:22:04 +0200 Subject: rcu: Warn when srcu_read_lock() is used in an extended quiescent state Catch SRCU up to the other variants of RCU by making PROVE_RCU complain if either srcu_read_lock() or srcu_read_lock_held() are used from within RCU-idle mode. Frederic reworked this to allow for the new versions of his patches that check for extended quiescent states. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/srcu.h | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 58971e891f48..4e0a3d41dae3 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -28,6 +28,7 @@ #define _LINUX_SRCU_H #include +#include struct srcu_struct_array { int c[2]; @@ -60,18 +61,10 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, __init_srcu_struct((sp), #sp, &__srcu_key); \ }) -# define srcu_read_acquire(sp) \ - lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define srcu_read_release(sp) \ - lock_release(&(sp)->dep_map, 1, _THIS_IP_) - #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ int init_srcu_struct(struct srcu_struct *sp); -# define srcu_read_acquire(sp) do { } while (0) -# define srcu_read_release(sp) do { } while (0) - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ void cleanup_srcu_struct(struct srcu_struct *sp); @@ -90,12 +83,29 @@ long srcu_batches_completed(struct srcu_struct *sp); * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an SRCU read-side critical section unless it can * prove otherwise. + * + * Note that if the CPU is in the idle loop from an RCU point of view + * (ie: that we are in the section between rcu_idle_enter() and + * rcu_idle_exit()) then srcu_read_lock_held() returns false even if + * the CPU did an srcu_read_lock(). The reason for this is that RCU + * ignores CPUs that are in such a section, considering these as in + * extended quiescent state, so such a CPU is effectively never in an + * RCU read-side critical section regardless of what RCU primitives it + * invokes. This state of affairs is required --- we need to keep an + * RCU-free window in idle where the CPU may possibly enter into low + * power mode. This way we can notice an extended quiescent state to + * other CPUs that started a grace period. Otherwise we would delay any + * grace period as long as we run in the idle task. */ static inline int srcu_read_lock_held(struct srcu_struct *sp) { - if (debug_locks) - return lock_is_held(&sp->dep_map); - return 1; + if (rcu_is_cpu_idle()) + return 0; + + if (!debug_locks) + return 1; + + return lock_is_held(&sp->dep_map); } #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -150,7 +160,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { int retval = __srcu_read_lock(sp); - srcu_read_acquire(sp); + rcu_lock_acquire(&(sp)->dep_map); return retval; } @@ -164,7 +174,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp) { - srcu_read_release(sp); + rcu_lock_release(&(sp)->dep_map); __srcu_read_unlock(sp, idx); } -- cgit v1.2.3 From 867f236bd12f5091df6dc7cc75f94d7fd982d78a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Oct 2011 18:22:05 +0200 Subject: rcu: Make srcu_read_lock_held() call common lockdep-enabled function A common debug_lockdep_rcu_enabled() function is used to check whether RCU lockdep splats should be reported, but srcu_read_lock() does not use it. This commit therefore brings srcu_read_lock_held() up to date. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/srcu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4e0a3d41dae3..d4b12443b2ef 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -84,6 +84,9 @@ long srcu_batches_completed(struct srcu_struct *sp); * this assumes we are in an SRCU read-side critical section unless it can * prove otherwise. * + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. + * * Note that if the CPU is in the idle loop from an RCU point of view * (ie: that we are in the section between rcu_idle_enter() and * rcu_idle_exit()) then srcu_read_lock_held() returns false even if @@ -102,7 +105,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) if (rcu_is_cpu_idle()) return 0; - if (!debug_locks) + if (!debug_lockdep_rcu_enabled()) return 1; return lock_is_held(&sp->dep_map); -- cgit v1.2.3 From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:06 +0200 Subject: nohz: Separate out irq exit and idle loop dyntick logic The tick_nohz_stop_sched_tick() function, which tries to delay the next timer tick as long as possible, can be called from two places: - From the idle loop to start the dytick idle mode - From interrupt exit if we have interrupted the dyntick idle mode, so that we reprogram the next tick event in case the irq changed some internal state that requires this action. There are only few minor differences between both that are handled by that function, driven by the ts->inidle cpu variable and the inidle parameter. The whole guarantees that we only update the dyntick mode on irq exit if we actually interrupted the dyntick idle mode, and that we enter in RCU extended quiescent state from idle loop entry only. Split this function into: - tick_nohz_idle_enter(), which sets ts->inidle to 1, enters dynticks idle mode unconditionally if it can, and enters into RCU extended quiescent state. - tick_nohz_irq_exit() which only updates the dynticks idle mode when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called). To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed into tick_nohz_idle_exit(). This simplifies the code and micro-optimize the irq exit path (no need for local_irq_save there). This also prepares for the split between dynticks and rcu extended quiescent state logics. We'll need this split to further fix illegal uses of RCU in extended quiescent states in the idle loop. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/tick.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index ca40838fdfb7..0df1d50a408a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -121,21 +121,22 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void tick_nohz_stop_sched_tick(int inidle); -extern void tick_nohz_restart_sched_tick(void); +extern void tick_nohz_idle_enter(void); +extern void tick_nohz_idle_exit(void); +extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else -static inline void tick_nohz_stop_sched_tick(int inidle) +static inline void tick_nohz_idle_enter(void) { - if (inidle) - rcu_idle_enter(); + rcu_idle_enter(); } -static inline void tick_nohz_restart_sched_tick(void) +static inline void tick_nohz_idle_exit(void) { rcu_idle_exit(); } + static inline ktime_t tick_nohz_get_sleep_length(void) { ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; -- cgit v1.2.3 From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Oct 2011 16:01:00 +0200 Subject: nohz: Allow rcu extended quiescent state handling seperately from tick stop It is assumed that rcu won't be used once we switch to tickless mode and until we restart the tick. However this is not always true, as in x86-64 where we dereference the idle notifiers after the tick is stopped. To prepare for fixing this, add two new APIs: tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu(). If no use of RCU is made in the idle loop between tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch must instead call the new *_norcu() version such that the arch doesn't need to call rcu_idle_enter() and rcu_idle_exit(). Otherwise the arch must call tick_nohz_enter_idle() and tick_nohz_exit_idle() and also call explicitly: - rcu_idle_enter() after its last use of RCU before the CPU is put to sleep. - rcu_idle_exit() before the first use of RCU after the CPU is woken up. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 0df1d50a408a..327434a05757 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -7,6 +7,7 @@ #define _LINUX_TICK_H #include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -121,18 +122,57 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void tick_nohz_idle_enter(void); +extern void __tick_nohz_idle_enter(void); +static inline void tick_nohz_idle_enter(void) +{ + local_irq_disable(); + __tick_nohz_idle_enter(); + local_irq_enable(); +} extern void tick_nohz_idle_exit(void); + +/* + * Call this pair of function if the arch doesn't make any use + * of RCU in-between. You won't need to call rcu_idle_enter() and + * rcu_idle_exit(). + * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit() + * and explicitly tell RCU about the window around the place the CPU enters low + * power mode where no RCU use is made. This is done by calling rcu_idle_enter() + * after the last use of RCU before the CPU is put to sleep and by calling + * rcu_idle_exit() before the first use of RCU after the CPU woke up. + */ +static inline void tick_nohz_idle_enter_norcu(void) +{ + /* + * Also call rcu_idle_enter() in the irq disabled section even + * if it disables irq itself. + * Just an optimization that prevents from an interrupt happening + * between it and __tick_nohz_idle_enter() to lose time to help + * completing a grace period while we could be in extended grace + * period already. + */ + local_irq_disable(); + __tick_nohz_idle_enter(); + rcu_idle_enter(); + local_irq_enable(); +} +static inline void tick_nohz_idle_exit_norcu(void) +{ + rcu_idle_exit(); + tick_nohz_idle_exit(); +} extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else -static inline void tick_nohz_idle_enter(void) +static inline void tick_nohz_idle_enter(void) { } +static inline void tick_nohz_idle_exit(void) { } +static inline void tick_nohz_idle_enter_norcu(void) { rcu_idle_enter(); } -static inline void tick_nohz_idle_exit(void) +static inline void tick_nohz_idle_exit_norcu(void) { rcu_idle_exit(); } -- cgit v1.2.3 From 0c53dd8b31404c1d7fd15be8f065ebaec615a562 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Oct 2011 15:13:11 -0700 Subject: rcu: Introduce raw SRCU read-side primitives The RCU implementations, including SRCU, are designed to be used in a lock-like fashion, so that the read-side lock and unlock primitives must execute in the same context for any given read-side critical section. This constraint is enforced by lockdep-RCU. However, there is a need to enter an SRCU read-side critical section within the context of an exception and then exit in the context of the task that encountered the exception. The cost of this capability is that the read-side operations incur the overhead of disabling interrupts. Note that although the current implementation allows a given read-side critical section to be entered by one task and then exited by another, all known possible implementations that allow this have scalability problems. Therefore, a given read-side critical section must be exited by the same task that entered it, though perhaps from an interrupt or exception handler running within that task's context. But if you are thinking in terms of interrupt handlers, make sure that you have considered the possibility of threaded interrupt handlers. Credit goes to Peter Zijlstra for suggesting use of the existing _raw suffix to indicate disabling lockdep over the earlier "bulkref" names. Requested-by: Srikar Dronamraju Signed-off-by: Paul E. McKenney Tested-by: Srikar Dronamraju --- include/linux/srcu.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index d4b12443b2ef..1eb520cd1680 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -181,4 +181,47 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/** + * srcu_read_lock_raw - register a new reader for an SRCU-protected structure. + * @sp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section. Similar to srcu_read_lock(), + * but avoids the RCU-lockdep checking. This means that it is legal to + * use srcu_read_lock_raw() in one context, for example, in an exception + * handler, and then have the matching srcu_read_unlock_raw() in another + * context, for example in the task that took the exception. + * + * However, the entire SRCU read-side critical section must reside within a + * single task. For example, beware of using srcu_read_lock_raw() in + * a device interrupt handler and srcu_read_unlock() in the interrupted + * task: This will not work if interrupts are threaded. + */ +static inline int srcu_read_lock_raw(struct srcu_struct *sp) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __srcu_read_lock(sp); + local_irq_restore(flags); + return ret; +} + +/** + * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure. + * @sp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock_raw(). + * + * Exit an SRCU read-side critical section without lockdep-RCU checking. + * See srcu_read_lock_raw() for more details. + */ +static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx) +{ + unsigned long flags; + + local_irq_save(flags); + __srcu_read_unlock(sp, idx); + local_irq_restore(flags); +} + #endif -- cgit v1.2.3 From c4f3060843506ba6d473ab9a0afe5bd5dc93a00d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Nov 2011 12:41:56 -0800 Subject: sched: Add is_idle_task() to handle invalidated uses of idle_cpu() Commit 908a3283 (Fix idle_cpu()) invalidated some uses of idle_cpu(), which used to say whether or not the CPU was running the idle task, but now instead says whether or not the CPU is running the idle task in the absence of pending wakeups. Although this new implementation gives a better answer to the question "is this CPU idle?", it also invalidates other uses that were made of idle_cpu(). This commit therefore introduces a new is_idle_task() API member that determines whether or not the specified task is one of the idle tasks, allowing open-coded "->pid == 0" sequences to be replaced by something more meaningful. Suggested-by: Josh Triplett Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc5..4a7e4d333a27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2070,6 +2070,14 @@ extern int sched_setscheduler(struct task_struct *, int, extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern struct task_struct *idle_task(int cpu); +/** + * is_idle_task - is the specified task an idle task? + * @tsk: the task in question. + */ +static inline bool is_idle_task(struct task_struct *p) +{ + return p->pid == 0; +} extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.3 From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Nov 2011 18:48:14 +0100 Subject: nohz: Remove tick_nohz_idle_enter_norcu() / tick_nohz_idle_exit_norcu() Those two APIs were provided to optimize the calls of tick_nohz_idle_enter() and rcu_idle_enter() into a single irq disabled section. This way no interrupt happening in-between would needlessly process any RCU job. Now we are talking about an optimization for which benefits have yet to be measured. Let's start simple and completely decouple idle rcu and dyntick idle logics to simplify. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 47 +---------------------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 327434a05757..ab8be90b5cc9 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,45 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void __tick_nohz_idle_enter(void); -static inline void tick_nohz_idle_enter(void) -{ - local_irq_disable(); - __tick_nohz_idle_enter(); - local_irq_enable(); -} +extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); - -/* - * Call this pair of function if the arch doesn't make any use - * of RCU in-between. You won't need to call rcu_idle_enter() and - * rcu_idle_exit(). - * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit() - * and explicitly tell RCU about the window around the place the CPU enters low - * power mode where no RCU use is made. This is done by calling rcu_idle_enter() - * after the last use of RCU before the CPU is put to sleep and by calling - * rcu_idle_exit() before the first use of RCU after the CPU woke up. - */ -static inline void tick_nohz_idle_enter_norcu(void) -{ - /* - * Also call rcu_idle_enter() in the irq disabled section even - * if it disables irq itself. - * Just an optimization that prevents from an interrupt happening - * between it and __tick_nohz_idle_enter() to lose time to help - * completing a grace period while we could be in extended grace - * period already. - */ - local_irq_disable(); - __tick_nohz_idle_enter(); - rcu_idle_enter(); - local_irq_enable(); -} -static inline void tick_nohz_idle_exit_norcu(void) -{ - rcu_idle_exit(); - tick_nohz_idle_exit(); -} extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); @@ -168,14 +131,6 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -static inline void tick_nohz_idle_enter_norcu(void) -{ - rcu_idle_enter(); -} -static inline void tick_nohz_idle_exit_norcu(void) -{ - rcu_idle_exit(); -} static inline ktime_t tick_nohz_get_sleep_length(void) { -- cgit v1.2.3 From 3842a0832a1d6eb0b31421f8810a813135967512 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Nov 2011 10:42:42 -0800 Subject: rcu: Document same-context read-side constraints The intent is that a given RCU read-side critical section be confined to a single context. For example, it is illegal to invoke rcu_read_lock() in an exception handler and then invoke rcu_read_unlock() from the context of the task that received the exception. Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++++++ include/linux/srcu.h | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5dd6fd8b3203..81c04f4348ec 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -265,6 +265,11 @@ extern int debug_lockdep_rcu_enabled(void); * * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. + * + * Note that rcu_read_lock() and the matching rcu_read_unlock() must + * occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock() in process context if the matching rcu_read_lock() + * was invoked from within an irq handler. */ static inline int rcu_read_lock_held(void) { @@ -689,6 +694,11 @@ static inline void rcu_read_unlock(void) * critical sections in interrupt context can use just rcu_read_lock(), * though this should at least be commented to avoid confusing people * reading the code. + * + * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() + * must occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() + * was invoked from some other task. */ static inline void rcu_read_lock_bh(void) { @@ -716,6 +726,11 @@ static inline void rcu_read_unlock_bh(void) * are being done using call_rcu_sched() or synchronize_rcu_sched(). * Read-side critical sections can also be introduced by anything that * disables preemption, including local_irq_disable() and friends. + * + * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() + * must occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock_sched() from process context if the matching + * rcu_read_lock_sched() was invoked from an NMI handler. */ static inline void rcu_read_lock_sched(void) { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 1eb520cd1680..e1b005918bbb 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -158,6 +158,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * one way to indirectly wait on an SRCU grace period is to acquire * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). + * + * Note that srcu_read_lock() and the matching srcu_read_unlock() must + * occur in the same context, for example, it is illegal to invoke + * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() + * was invoked in process context. */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { -- cgit v1.2.3 From 2987557f52b97f679f0c324d8f51b8d66e1f2084 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 3 Dec 2011 13:06:50 -0800 Subject: driver-core/cpu: Expose hotpluggability to the rest of the kernel When architectures register CPUs, they indicate whether the CPU allows hotplugging; notably, x86 and ARM don't allow hotplugging CPU 0. Userspace can easily query the hotpluggability of a CPU via sysfs; however, the kernel has no convenient way of accessing that property in an architecture-independent way. While the kernel can simply try it and see, some code needs to distinguish between "hotplug failed" and "hotplug has no hope of working on this CPU"; for example, rcutorture's CPU hotplug tests want to avoid drowning out real hotplug failures with expected failures. Expose this property via a new cpu_is_hotpluggable function, so that the rest of the kernel can access it in an architecture-independent way. Signed-off-by: Josh Triplett Signed-off-by: Paul E. McKenney --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6cb60fd2ea84..305c263021e7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -27,6 +27,7 @@ struct cpu { extern int register_cpu(struct cpu *cpu, int num); extern struct sys_device *get_cpu_sysdev(unsigned cpu); +extern bool cpu_is_hotpluggable(unsigned cpu); extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); -- cgit v1.2.3 From dfd56b8b38fff3586f36232db58e1e9f7885a605 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 10 Dec 2011 09:48:31 +0000 Subject: net: use IS_ENABLED(CONFIG_IPV6) Instead of testing defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/errqueue.h | 4 ++-- include/linux/ipv6.h | 4 ++-- include/linux/lockd/lockd.h | 6 +++--- include/linux/sunrpc/clnt.h | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index c9f522bd17e4..fd0628be45ce 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -25,7 +25,7 @@ struct sock_extended_err { #ifdef __KERNEL__ #include -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -34,7 +34,7 @@ struct sock_extended_err { struct sock_exterr_skb { union { struct inet_skb_parm h4; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0c997767429a..6318268dcaf5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -404,7 +404,7 @@ struct tcp6_sock { extern int inet6_sk_rebuild_header(struct sock *sk); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { return inet_sk(__sk)->pinet6; @@ -515,7 +515,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif /* IS_ENABLED(CONFIG_IPV6) */ #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ff9abff55aa0..90b0656a869e 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -301,7 +301,7 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap) return ipv4_is_loopback(sin->sin_addr.s_addr); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static inline int __nlm_privileged_request6(const struct sockaddr *sap) { const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; @@ -314,12 +314,12 @@ static inline int __nlm_privileged_request6(const struct sockaddr *sap) return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK; } -#else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#else /* IS_ENABLED(CONFIG_IPV6) */ static inline int __nlm_privileged_request6(const struct sockaddr *sap) { return 0; } -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif /* IS_ENABLED(CONFIG_IPV6) */ /* * Ensure incoming requests are from local privileged callers. diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index f15fd985b08a..2c5993a17c33 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -215,7 +215,7 @@ static inline bool __rpc_copy_addr4(struct sockaddr *dst, return true; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { @@ -240,7 +240,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, dsin6->sin6_addr = ssin6->sin6_addr; return true; } -#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +#else /* !(IS_ENABLED(CONFIG_IPV6) */ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { @@ -252,7 +252,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, { return false; } -#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ /** * rpc_cmp_addr - compare the address portion of two sockaddrs. -- cgit v1.2.3 From a469ebd56f8bee8d5352b1a284ea39d23ba02430 Mon Sep 17 00:00:00 2001 From: Mark Einon Date: Tue, 6 Dec 2011 23:18:14 +0000 Subject: types.h: fix comment spelling for 'architectures' Spelling change, architetures -> architectures Signed-off-by: Mark Einon Signed-off-by: Jiri Kosina --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 57a97234bec1..cbcef6ebeba9 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -188,7 +188,7 @@ typedef __u32 __bitwise __wsum; * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid * common 32/64-bit compat problems. * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other - * architectures) and to 8-byte boundaries on 64-bit architetures. The new + * architectures) and to 8-byte boundaries on 64-bit architectures. The new * aligned_64 type enforces 8-byte alignment so that structs containing * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. * No conversions are necessary between 32-bit user-space and a 64-bit kernel. -- cgit v1.2.3 From 15c9a0acc3f7873db4b7d35d016729b2dc229b49 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 12 Dec 2011 09:25:57 -0700 Subject: of: create of_phandle_args to simplify return of phandle parsing data of_parse_phandle_with_args() needs to return quite a bit of data. Rather than making each datum a separate **out_ argument, this patch creates struct of_phandle_args to contain all the returned data and reworks the user of the function. This patch also enables of_parse_phandle_with_args() to return the device node pointer for the phandle node. This patch also ends up being fairly major surgery to of_parse_handle_with_args(). The existing structure didn't work well when extending to use of_phandle_args, and I discovered bugs during testing. I also took the opportunity to rename the function to be like the existing of_parse_phandle(). v2: - moved declaration of of_phandle_args to fix compile on non-DT builds - fixed incorrect index in example usage - fixed incorrect return code handling for empty entries Reviewed-by: Shawn Guo Signed-off-by: Grant Likely --- include/linux/of.h | 11 +++++++++-- include/linux/of_gpio.h | 10 ++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 4948552d60f5..ea44fd72af5f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -65,6 +65,13 @@ struct device_node { #endif }; +#define MAX_PHANDLE_ARGS 8 +struct of_phandle_args { + struct device_node *np; + int args_count; + uint32_t args[MAX_PHANDLE_ARGS]; +}; + #ifdef CONFIG_OF /* Pointer for first entry in chain of all nodes. */ @@ -230,9 +237,9 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len); extern struct device_node *of_parse_phandle(struct device_node *np, const char *phandle_name, int index); -extern int of_parse_phandles_with_args(struct device_node *np, +extern int of_parse_phandle_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, - struct device_node **out_node, const void **out_args); + struct of_phandle_args *out_args); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 52280a2b5e63..b254052a49d7 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -18,6 +18,7 @@ #include #include #include +#include struct device_node; @@ -57,8 +58,9 @@ extern int of_mm_gpiochip_add(struct device_node *np, extern void of_gpiochip_add(struct gpio_chip *gc); extern void of_gpiochip_remove(struct gpio_chip *gc); extern struct gpio_chip *of_node_to_gpiochip(struct device_node *np); -extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np, - const void *gpio_spec, u32 *flags); +extern int of_gpio_simple_xlate(struct gpio_chip *gc, + const struct of_phandle_args *gpiospec, + u32 *flags); #else /* CONFIG_OF_GPIO */ @@ -75,8 +77,8 @@ static inline unsigned int of_gpio_count(struct device_node *np) } static inline int of_gpio_simple_xlate(struct gpio_chip *gc, - struct device_node *np, - const void *gpio_spec, u32 *flags) + const struct of_phandle_args *gpiospec, + u32 *flags) { return -ENOSYS; } -- cgit v1.2.3 From e1aab161e0135aafcd439be20b4f35e4b0922d95 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sun, 11 Dec 2011 21:47:03 +0000 Subject: socket: initial cgroup code. The goal of this work is to move the memory pressure tcp controls to a cgroup, instead of just relying on global conditions. To avoid excessive overhead in the network fast paths, the code that accounts allocated memory to a cgroup is hidden inside a static_branch(). This branch is patched out until the first non-root cgroup is created. So when nobody is using cgroups, even if it is mounted, no significant performance penalty should be seen. This patch handles the generic part of the code, and has nothing tcp-specific. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki CC: Kirill A. Shutemov CC: David S. Miller CC: Eric W. Biederman CC: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/memcontrol.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b87068a1a09e..f15021b9f734 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,6 +85,8 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); +extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); + static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { @@ -381,5 +383,25 @@ mem_cgroup_print_bad_page(struct page *page) } #endif +#ifdef CONFIG_INET +enum { + UNDER_LIMIT, + SOFT_LIMIT, + OVER_LIMIT, +}; + +struct sock; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +void sock_update_memcg(struct sock *sk); +void sock_release_memcg(struct sock *sk); +#else +static inline void sock_update_memcg(struct sock *sk) +{ +} +static inline void sock_release_memcg(struct sock *sk) +{ +} +#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_INET */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From d1a4c0b37c296e600ffe08edb0db2dc1b8f550d7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sun, 11 Dec 2011 21:47:04 +0000 Subject: tcp memory pressure controls This patch introduces memory pressure controls for the tcp protocol. It uses the generic socket memory pressure code introduced in earlier patches, and fills in the necessary data in cg_proto struct. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki CC: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f15021b9f734..1513994ce207 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); +extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) -- cgit v1.2.3 From 90b41a1cd44cc4e507b554ae5a36562a1ba9a4e8 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Mon, 12 Dec 2011 14:30:00 +0000 Subject: netem: add cell concept to simulate special MAC behavior This extension can be used to simulate special link layer characteristics. Simulate because packet data is not modified, only the calculation base is changed to delay a packet based on the original packet size and artificial cell information. packet_overhead can be used to simulate a link layer header compression scheme (e.g. set packet_overhead to -20) or with a positive packet_overhead value an additional MAC header can be simulated. It is also possible to "replace" the 14 byte Ethernet header with something else. cell_size and cell_overhead can be used to simulate link layer schemes, based on cells, like some TDMA schemes. Another application area are MAC schemes using a link layer fragmentation with a (small) header each. Cell size is the maximum amount of data bytes within one cell. Cell overhead is an additional variable to change the per-cell-overhead (e.g. 5 byte header per fragment). Example (5 kbit/s, 20 byte per packet overhead, cell-size 100 byte, per cell overhead 5 byte): tc qdisc add dev eth0 root netem rate 5kbit 20 100 5 Signed-off-by: Hagen Paul Pfeifer Signed-off-by: Florian Westphal Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 8786ea741f52..8daced32a014 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -502,6 +502,9 @@ struct tc_netem_corrupt { struct tc_netem_rate { __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; }; enum { -- cgit v1.2.3 From 13c07b0286d340275f2d97adf085cecda37ede37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 12 Dec 2011 22:06:55 -0800 Subject: linux/log2.h: Fix rounddown_pow_of_two(1) Exactly like roundup_pow_of_two(1), the rounddown version was buggy for the case of a compile-time constant '1' argument. Probably because it originated from the same code, sharing history with the roundup version from before the bugfix (for that one, see commit 1a06a52ee1b0: "Fix roundup_pow_of_two(1)"). However, unlike the roundup version, the fix for rounddown is to just remove the broken special case entirely. It's simply not needed - the generic code 1UL << ilog2(n) does the right thing for the constant '1' argment too. The only reason roundup needed that special case was because rounding up does so by subtracting one from the argument (and then adding one to the result) causing the obvious problems with "ilog2(0)". But rounddown doesn't do any of that, since ilog2() naturally truncates (ie "rounds down") to the right rounded down value. And without the ilog2(0) case, there's no reason for the special case that had the wrong value. tl;dr: rounddown_pow_of_two(1) should be 1, not 0. Acked-by: Dmitry Torokhov Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/log2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 25b808631cd9..fd7ff3d91e6a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define rounddown_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 0 : \ (1UL << ilog2(n))) : \ __rounddown_pow_of_two(n) \ ) -- cgit v1.2.3 From 4af679cd7cbb0a0d8774b5cdb34bffcaa4e86e52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 13 Dec 2011 10:36:20 +0100 Subject: kref: Inline all functions These are tiny functions, there's no point in having them out-of-line. Cc: Alexey Dobriyan Cc: Eric Dumazet Cc: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8eccvi2ur2fzgi00xdjlbf5z@git.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index d4a62ab2ee5e..1cbae9f2ef77 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -16,15 +16,85 @@ #define _KREF_H_ #include +#include struct kref { atomic_t refcount; }; -void kref_init(struct kref *kref); -void kref_get(struct kref *kref); -int kref_put(struct kref *kref, void (*release) (struct kref *kref)); -int kref_sub(struct kref *kref, unsigned int count, - void (*release) (struct kref *kref)); +/** + * kref_init - initialize object. + * @kref: object in question. + */ +static inline void kref_init(struct kref *kref) +{ + atomic_set(&kref->refcount, 1); + smp_mb(); +} + +/** + * kref_get - increment refcount for object. + * @kref: object. + */ +static inline void kref_get(struct kref *kref) +{ + WARN_ON(!atomic_read(&kref->refcount)); + atomic_inc(&kref->refcount); + smp_mb__after_atomic_inc(); +} + +/** + * kref_put - decrement refcount for object. + * @kref: object. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Decrement the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_dec_and_test(&kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +static inline int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} #endif /* _KREF_H_ */ -- cgit v1.2.3 From 47dbd7d90ad80edb67822f327241edcab8f3f46f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 10 Dec 2011 11:43:43 +0100 Subject: kref: Implement kref_put in terms of kref_sub Less lines of code is better. Cc: Alexey Dobriyan Cc: Eric Dumazet Cc: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 1cbae9f2ef77..fa9907a541e2 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -44,57 +44,49 @@ static inline void kref_get(struct kref *kref) } /** - * kref_put - decrement refcount for object. + * kref_sub - subtract a number of refcounts for object. * @kref: object. + * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree * in as this function. * - * Decrement the refcount, and if 0, call release(). + * Subtract @count from the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) +static inline int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) { WARN_ON(release == NULL); WARN_ON(release == (void (*)(struct kref *))kfree); - if (atomic_dec_and_test(&kref->refcount)) { + if (atomic_sub_and_test((int) count, &kref->refcount)) { release(kref); return 1; } return 0; } - /** - * kref_sub - subtract a number of refcounts for object. + * kref_put - decrement refcount for object. * @kref: object. - * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree * in as this function. * - * Subtract @count from the refcount, and if 0, call release(). + * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_sub(struct kref *kref, unsigned int count, - void (*release)(struct kref *kref)) +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { - WARN_ON(release == NULL); - WARN_ON(release == (void (*)(struct kref *))kfree); - - if (atomic_sub_and_test((int) count, &kref->refcount)) { - release(kref); - return 1; - } - return 0; + return kref_sub(kref, 1, release); } #endif /* _KREF_H_ */ -- cgit v1.2.3 From 3c8ed88974472b928489e3943616500ce2ad0cd8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 10 Dec 2011 11:43:44 +0100 Subject: kref: Remove the memory barriers Commit 1b0b3b9980e ("kref: fix CPU ordering with respect to krefs") wrongly adds memory barriers to kref. It states: some atomic operations are only atomic, not ordered. Thus a CPU is allowed to reorder memory references to an object to before the reference is obtained. This fixes it. While true, it fails to show why this is a problem. I say it is not a problem because if there is a race with kref_put() such that we could end up referencing a free'd object without this memory barrier, we would still have that race with the memory barrier. The kref_put() in question could complete (and free the object) before the atomic_inc() and we'd still be up shit creek. The kref_init() case is even worse, if your object is published at this time you're so wrong the memory barrier won't make a difference what so ever. If its not published, the act of publishing should include the needed barriers/locks to make sure all writes prior to the act of publishing are complete such that others will only observe a complete object. Cc: Alexey Dobriyan Cc: Eric Dumazet Cc: Ingo Molnar Cc: Oliver Neukum Signed-off-by: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index fa9907a541e2..d66c88a3b48c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -29,7 +29,6 @@ struct kref { static inline void kref_init(struct kref *kref) { atomic_set(&kref->refcount, 1); - smp_mb(); } /** @@ -40,7 +39,6 @@ static inline void kref_get(struct kref *kref) { WARN_ON(!atomic_read(&kref->refcount)); atomic_inc(&kref->refcount); - smp_mb__after_atomic_inc(); } /** -- cgit v1.2.3 From 623ed84b1f9553bc962c2aca92f488aa6f27ecd1 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 13 Dec 2011 04:10:33 +0000 Subject: mlx4_core: initial header-file changes for SRIOV support These changes will not affect module operation as yet. They are only to get some structs and enums in place for use by subsequent patches (making those smaller). Added here: * sriov state structs and inlines (mlx4_is_master/slave/mfunc) * comm-channel and vhcr support structures * enum values for new FW and comm-channel virtual commands (i.e., commands, passed via the comm channel to the PF-driver). * prototypes for many command wrapper functions (used by the PF context for processing FW commands passed to it by the VFs). * struct mlx4_eqe is moved from eq.c to mlx4.h (it will be used by other mlx4_core source files). Signed-off-by: Jack Morgenstein Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 29 ++++++++++++++++++++++++++++- include/linux/mlx4/device.h | 42 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index b56e4587208d..e8e92814c8a0 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -59,12 +59,15 @@ enum { MLX4_CMD_HW_HEALTH_CHECK = 0x50, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_SET_NODE = 0x5a, + MLX4_CMD_QUERY_FUNC = 0x56, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, MLX4_CMD_UNMAP_ICM = 0xff9, MLX4_CMD_MAP_ICM_AUX = 0xffc, MLX4_CMD_UNMAP_ICM_AUX = 0xffb, MLX4_CMD_SET_ICM_SIZE = 0xffd, + /*master notify fw on finish for slave's flr*/ + MLX4_CMD_INFORM_FLR_DONE = 0x5b, /* TPT commands */ MLX4_CMD_SW2HW_MPT = 0xd, @@ -119,6 +122,26 @@ enum { /* miscellaneous commands */ MLX4_CMD_DIAG_RPRT = 0x30, MLX4_CMD_NOP = 0x31, + MLX4_CMD_ACCESS_MEM = 0x2e, + MLX4_CMD_SET_VEP = 0x52, + + /* Ethernet specific commands */ + MLX4_CMD_SET_VLAN_FLTR = 0x47, + MLX4_CMD_SET_MCAST_FLTR = 0x48, + MLX4_CMD_DUMP_ETH_STATS = 0x49, + + /* Communication channel commands */ + MLX4_CMD_ARM_COMM_CHANNEL = 0x57, + MLX4_CMD_GEN_EQE = 0x58, + + /* virtual commands */ + MLX4_CMD_ALLOC_RES = 0xf00, + MLX4_CMD_FREE_RES = 0xf01, + MLX4_CMD_MCAST_ATTACH = 0xf05, + MLX4_CMD_UCAST_ATTACH = 0xf06, + MLX4_CMD_PROMISC = 0xf08, + MLX4_CMD_QUERY_FUNC_CAP = 0xf0a, + MLX4_CMD_QP_ATTACH = 0xf0b, /* debug commands */ MLX4_CMD_QUERY_DEBUG_MSG = 0x2a, @@ -126,6 +149,7 @@ enum { /* statistics commands */ MLX4_CMD_QUERY_IF_STAT = 0X54, + MLX4_CMD_SET_IF_STAT = 0X55, }; enum { @@ -135,7 +159,8 @@ enum { }; enum { - MLX4_MAILBOX_SIZE = 4096 + MLX4_MAILBOX_SIZE = 4096, + MLX4_ACCESS_MEM_ALIGN = 256, }; enum { @@ -192,4 +217,6 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); +u32 mlx4_comm_get_version(void); + #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca2c39771c38..b9466af2348f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -47,6 +47,9 @@ enum { MLX4_FLAG_MSI_X = 1 << 0, MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, + MLX4_FLAG_MASTER = 1 << 2, + MLX4_FLAG_SLAVE = 1 << 3, + MLX4_FLAG_SRIOV = 1 << 4, }; enum { @@ -57,6 +60,15 @@ enum { MLX4_BOARD_ID_LEN = 64 }; +enum { + MLX4_MAX_NUM_PF = 16, + MLX4_MAX_NUM_VF = 64, + MLX4_MFUNC_MAX = 80, + MLX4_MFUNC_EQ_NUM = 4, + MLX4_MFUNC_MAX_EQES = 8, + MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) +}; + enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, @@ -117,7 +129,11 @@ enum mlx4_event { MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, - MLX4_EVENT_TYPE_CMD = 0x0a + MLX4_EVENT_TYPE_CMD = 0x0a, + MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, + MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, + MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, + MLX4_EVENT_TYPE_NONE = 0xff, }; enum { @@ -184,6 +200,7 @@ enum mlx4_qp_region { }; enum mlx4_port_type { + MLX4_PORT_TYPE_NONE = 0, MLX4_PORT_TYPE_IB = 1, MLX4_PORT_TYPE_ETH = 2, MLX4_PORT_TYPE_AUTO = 3 @@ -216,6 +233,7 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) struct mlx4_caps { u64 fw_ver; + u32 function; int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; @@ -466,6 +484,7 @@ struct mlx4_counter { struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; + unsigned long num_slaves; struct mlx4_caps caps; struct radix_tree_root qp_table_tree; u8 rev_id; @@ -494,8 +513,27 @@ struct mlx4_init_port_param { #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask & 1 << ((port) - 1)) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + +static inline int mlx4_is_master(struct mlx4_dev *dev) +{ + return dev->flags & MLX4_FLAG_MASTER; +} + +static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) +{ + return (qpn < dev->caps.sqp_start + 8); +} +static inline int mlx4_is_mfunc(struct mlx4_dev *dev) +{ + return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER); +} + +static inline int mlx4_is_slave(struct mlx4_dev *dev) +{ + return dev->flags & MLX4_FLAG_SLAVE; +} int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); -- cgit v1.2.3 From 65dab25deb8da7dba4b6dd0145a9143be7f8369f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 13 Dec 2011 04:10:41 +0000 Subject: mlx4: Extanding port_mask functionality Port mask now has additional state. Port can be set as "none". In this case neither the mlx4_en or mlx4_ib drivers take ownership of the port. In multifunction mode there is an option to set the vfs as single ported devices. (in single function mode, both physical ports belong to same function) Signed-off-by: Jack Morgenstein Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b9466af2348f..3333018d2913 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -302,7 +302,7 @@ struct mlx4_caps { int log_num_prios; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; - u32 port_mask; + u32 port_mask[MLX4_MAX_PORTS + 1]; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 ext_port_cap[MLX4_MAX_PORTS + 1]; @@ -507,13 +507,12 @@ struct mlx4_init_port_param { #define mlx4_foreach_port(port, dev, type) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ - ~(dev)->caps.port_mask) & 1 << ((port) - 1)) + if ((type) == (dev)->caps.port_mask[(port)]) -#define mlx4_foreach_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((dev)->caps.port_mask & 1 << ((port) - 1)) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) +#define mlx4_foreach_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) static inline int mlx4_is_master(struct mlx4_dev *dev) { -- cgit v1.2.3 From f9baff509f8a05a79626defdbdf4f4aa4efd373b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 13 Dec 2011 04:10:51 +0000 Subject: mlx4_core: Add "native" argument to mlx4_cmd and its callers (where needed) For SRIOV, some Hypervisor commands can be executed directly (native = 1). Others should go through the command wrapper flow (for tracking resource usage, for example, or for changing some HCA configurations that slaves need to be notified of). This patch sets the groundwork for this capability -- adding the correct value of "native" in each case. Note that if SRIOV is not activated, this parameter has no effect. Signed-off-by: Jack Morgenstein Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e8e92814c8a0..ae62630a665e 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -173,6 +173,11 @@ enum { MLX4_SET_PORT_GID_TABLE = 0x5, }; +enum { + MLX4_CMD_WRAPPED, + MLX4_CMD_NATIVE +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { @@ -182,23 +187,24 @@ struct mlx4_cmd_mailbox { int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, - u16 op, unsigned long timeout); + u16 op, unsigned long timeout, int native); /* Invoke a command with no output parameter */ static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned long timeout, + int native) { return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier, - op_modifier, op, timeout); + op_modifier, op, timeout, native); } /* Invoke a command with an output mailbox */ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned long timeout, int native) { return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier, - op_modifier, op, timeout); + op_modifier, op, timeout, native); } /* @@ -208,10 +214,10 @@ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param */ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned long timeout, int native) { return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier, - op_modifier, op, timeout); + op_modifier, op, timeout, native); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); -- cgit v1.2.3 From f5311ac109b21c9b47118655a5b6d887bcc686f8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 13 Dec 2011 04:12:13 +0000 Subject: mlx4_core: Reduce number of PD bits to 17 When SRIOV is enabled on the chip (at FW burning time), the HCA uses only 17 bits for the PD. The remaining 7 high-order bits are ignored. Change the allocator to return only 17 bits for the PD. The MSB 7 bits will be used to encode the slave number for consistency checking later on in the resource tracker. Signed-off-by: Jack Morgenstein Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3333018d2913..e4be34a908a7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -248,6 +248,7 @@ struct mlx4_caps { u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; + u32 uar_page_size; int bf_reg_size; int bf_regs_per_page; int max_sq_sg; -- cgit v1.2.3 From ffe455ad04681f3fc48eef595fe526a795f809a3 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 13 Dec 2011 04:16:21 +0000 Subject: mlx4: Ethernet port management modifications The physical port is now common to the PF and VFs. The port resources and configuration is managed by the PF, VFs can only influence the MTU of the port, it is set as max among all functions, Each function allocates RX buffers of required size to meet it's MTU enforcement. Port management code was moved to mlx4_core, as the mlx4_en module is virtualization unaware Move handling qp functionality to mlx4_get_eth_qp/mlx4_put_eth_qp including reserve/release range and add/release unicast steering. Let mlx4_register/unregister_mac deal only with MAC (un)registration. Signed-off-by: Eugenia Emantayev Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4be34a908a7..3ef73b05e24e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -599,6 +599,10 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); +int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback, enum mlx4_protocol prot); +int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol protocol); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], @@ -609,9 +613,11 @@ int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); -int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap); -void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn); -int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap); +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); +int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); +int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn); +void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3 From 2b8fb2867ca2736a715a88067fd0ec2904777cbe Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 13 Dec 2011 04:16:56 +0000 Subject: mlx4_core: mtts resources units changed to offset In the previous implementation mtts are managed by: 1. order - log(mtt segments), 'mtt segment' groups several mtts together. 2. first_seg - segment location relative to mtt table. In the current implementation: 1. order - log(mtts) rather than segments 2. offset - mtt index in mtt table Note: The actual mtt allocation is made in segments but it is transparent to callers. Rational: The mtt resource holders are not interested on how the allocation of mtt is done, but rather on how they will use it. Signed-off-by: Marcel Apfelbaum Reviewed-by: Jack Morgenstein Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3ef73b05e24e..65bb466c575f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -272,8 +272,7 @@ struct mlx4_caps { int num_comp_vectors; int comp_pool; int num_mpts; - int num_mtt_segs; - int mtts_per_seg; + int num_mtts; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; @@ -323,7 +322,7 @@ struct mlx4_buf { }; struct mlx4_mtt { - u32 first_seg; + u32 offset; int order; int page_shift; }; -- cgit v1.2.3 From ab9c17a009ee8eb8c667f22dc0be0709effceab9 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 13 Dec 2011 04:18:30 +0000 Subject: mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet 1. Added module parameters sr_iov and probe_vf for controlling enablement of SRIOV mode. 2. Increased default max num-qps, num-mpts and log_num_macs to accomodate SRIOV mode 3. Added port_type_array as a module parameter to allow driver startup with ports configured as desired. In SRIOV mode, only ETH is supported, and this array is ignored; otherwise, for the case where the FW supports both port types (ETH and IB), the port_type_array parameter is used. By default, the port_type_array is set to configure both ports as IB. 4. When running in sriov mode, the master needs to initialize the ICM eq table to hold the eq's for itself and also for all the slaves. 5. mlx4_set_port_mask() now invoked from mlx4_init_hca, instead of in mlx4_dev_cap. 6. Introduced sriov VF (slave) device startup/teardown logic (mainly procedures mlx4_init_slave, mlx4_slave_exit, mlx4_slave_cap, mlx4_slave_exit and flow modifications in __mlx4_init_one, mlx4_init_hca, and mlx4_setup_hca). VFs obtain their startup information from the PF (master) device via the comm channel. 7. In SRIOV mode (both PF and VF), MSI_X must be enabled, or the driver aborts loading the device. 8. Do not allow setting port type via sysfs when running in SRIOV mode. 9. mlx4_get_ownership: Currently, only one PF is supported by the driver. If the HCA is burned with FW which enables more than one PF, only one of the PFs is allowed to run. The first one up grabs a FW ownership semaphone -- all other PFs will find that semaphore taken, and the driver will not allow them to run. Signed-off-by: Jack Morgenstein Signed-off-by: Yevgeny Petrilin Signed-off-by: Liran Liss Signed-off-by: Marcel Apfelbaum Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 ++ include/linux/mlx4/device.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index ae62630a665e..9958ff2cad3c 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -225,4 +225,6 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); +#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) + #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 65bb466c575f..5f784ff6a36e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -489,6 +489,7 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; + int num_vfs; }; struct mlx4_init_port_param { -- cgit v1.2.3 From 8cb25e14fe80d0fac42412364df573eb3e8e83cc Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Thu, 8 Dec 2011 13:11:54 +0100 Subject: ieee80211: Introduce ieee80211_is_first_frag Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 17f2a768e2ad..210e2c325534 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -544,6 +544,15 @@ static inline int ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set + * @seq_ctrl: frame sequence control bytes in little-endian byteorder + */ +static inline int ieee80211_is_first_frag(__le16 seq_ctrl) +{ + return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; +} + struct ieee80211s_hdr { u8 flags; u8 ttl; -- cgit v1.2.3 From 8a5ac6ecd56756ee72588627aa23ab6cf9b790db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 8 Dec 2011 18:02:21 +0100 Subject: ssb: extract FEM info from SPROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/ssb/ssb.h | 9 +++++++++ include/linux/ssb/ssb_regs.h | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 061e560251b4..dcf35b0f303a 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -94,6 +94,15 @@ struct ssb_sprom { } ghz5; /* 5GHz band */ } antenna_gain; + struct { + struct { + u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut; + } ghz2; + struct { + u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut; + } ghz5; + } fem; + /* TODO - add any parameters needed from rev 2, 3, 4, 5 or 8 SPROMs */ }; diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 98941203a27f..c814ae6eeb22 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -432,6 +432,23 @@ #define SSB_SPROM8_RXPO2G 0x00FF /* 2GHz RX power offset */ #define SSB_SPROM8_RXPO5G 0xFF00 /* 5GHz RX power offset */ #define SSB_SPROM8_RXPO5G_SHIFT 8 +#define SSB_SPROM8_FEM2G 0x00AE +#define SSB_SPROM8_FEM5G 0x00B0 +#define SSB_SROM8_FEM_TSSIPOS 0x0001 +#define SSB_SROM8_FEM_TSSIPOS_SHIFT 0 +#define SSB_SROM8_FEM_EXTPA_GAIN 0x0006 +#define SSB_SROM8_FEM_EXTPA_GAIN_SHIFT 1 +#define SSB_SROM8_FEM_PDET_RANGE 0x00F8 +#define SSB_SROM8_FEM_PDET_RANGE_SHIFT 3 +#define SSB_SROM8_FEM_TR_ISO 0x0700 +#define SSB_SROM8_FEM_TR_ISO_SHIFT 8 +#define SSB_SROM8_FEM_ANTSWLUT 0xF800 +#define SSB_SROM8_FEM_ANTSWLUT_SHIFT 11 +#define SSB_SPROM8_THERMAL 0x00B2 +#define SSB_SPROM8_MPWR_RAWTS 0x00B4 +#define SSB_SPROM8_TS_SLP_OPT_CORRX 0x00B6 +#define SSB_SPROM8_FOC_HWIQ_IQSWP 0x00B8 +#define SSB_SPROM8_PHYCAL_TEMPDELTA 0x00BA #define SSB_SPROM8_MAXP_BG 0x00C0 /* Max Power 2GHz in path 1 */ #define SSB_SPROM8_MAXP_BG_MASK 0x00FF /* Mask for Max Power 2GHz */ #define SSB_SPROM8_ITSSI_BG 0xFF00 /* Mask for path 1 itssi_bg */ -- cgit v1.2.3 From aee5ed563d56c713d2a51d6f16e08b83fd9665d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 8 Dec 2011 18:02:22 +0100 Subject: bcma: extract FEM info from SPROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/bcma/bcma_driver_chipcommon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 1526d965ed06..a33086a7530b 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -203,6 +203,7 @@ #define BCMA_CC_PMU_CTL 0x0600 /* PMU control */ #define BCMA_CC_PMU_CTL_ILP_DIV 0xFFFF0000 /* ILP div mask */ #define BCMA_CC_PMU_CTL_ILP_DIV_SHIFT 16 +#define BCMA_CC_PMU_CTL_PLL_UPD 0x00000400 #define BCMA_CC_PMU_CTL_NOILPONW 0x00000200 /* No ILP on wait */ #define BCMA_CC_PMU_CTL_HTREQEN 0x00000100 /* HT req enable */ #define BCMA_CC_PMU_CTL_ALPREQEN 0x00000080 /* ALP req enable */ -- cgit v1.2.3 From 9d08f10d355afd500310738ff09b4d921a447102 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 8 Dec 2011 15:06:41 -0800 Subject: bcma: add set/mask macros for 16-bit register access The BCMA header only had definitions for 32-bit register access. Used those as a template for the 16-bit flavour. Also changed them to inline functions to be on the safe side. As offset parameter is used twice there would be a problem when used like this: bcma_set32(core, offset++, val); Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Alwin Beukers Signed-off-by: Arend van Spriel Signed-off-by: Franky Lin Signed-off-by: John W. Linville --- include/linux/bcma/bcma.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 4d4b59de9467..de6057f16987 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -254,12 +254,32 @@ void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value) core->bus->ops->awrite32(core, offset, value); } -#define bcma_mask32(cc, offset, mask) \ - bcma_write32(cc, offset, bcma_read32(cc, offset) & (mask)) -#define bcma_set32(cc, offset, set) \ - bcma_write32(cc, offset, bcma_read32(cc, offset) | (set)) -#define bcma_maskset32(cc, offset, mask, set) \ - bcma_write32(cc, offset, (bcma_read32(cc, offset) & (mask)) | (set)) +static inline void bcma_mask32(struct bcma_device *cc, u16 offset, u32 mask) +{ + bcma_write32(cc, offset, bcma_read32(cc, offset) & mask); +} +static inline void bcma_set32(struct bcma_device *cc, u16 offset, u32 set) +{ + bcma_write32(cc, offset, bcma_read32(cc, offset) | set); +} +static inline void bcma_maskset32(struct bcma_device *cc, + u16 offset, u32 mask, u32 set) +{ + bcma_write32(cc, offset, (bcma_read32(cc, offset) & mask) | set); +} +static inline void bcma_mask16(struct bcma_device *cc, u16 offset, u16 mask) +{ + bcma_write16(cc, offset, bcma_read16(cc, offset) & mask); +} +static inline void bcma_set16(struct bcma_device *cc, u16 offset, u16 set) +{ + bcma_write16(cc, offset, bcma_read16(cc, offset) | set); +} +static inline void bcma_maskset16(struct bcma_device *cc, + u16 offset, u16 mask, u16 set) +{ + bcma_write16(cc, offset, (bcma_read16(cc, offset) & mask) | set); +} extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); -- cgit v1.2.3 From 084455524f0d46dd210b4397898aff73579b97e8 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 8 Dec 2011 15:06:42 -0800 Subject: bcma: use static keyword for inline function declaration in bcma.h Just scratching an itch here, but it makes more sense to use the static keyword if you think about how the compiler treats inline functions. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Alwin Beukers Signed-off-by: Arend van Spriel Signed-off-by: Franky Lin Signed-off-by: John W. Linville --- include/linux/bcma/bcma.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index de6057f16987..f4b8346b1a33 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,50 +205,51 @@ struct bcma_bus { struct ssb_sprom sprom; }; -extern inline u32 bcma_read8(struct bcma_device *core, u16 offset) +static inline u32 bcma_read8(struct bcma_device *core, u16 offset) { return core->bus->ops->read8(core, offset); } -extern inline u32 bcma_read16(struct bcma_device *core, u16 offset) +static inline u32 bcma_read16(struct bcma_device *core, u16 offset) { return core->bus->ops->read16(core, offset); } -extern inline u32 bcma_read32(struct bcma_device *core, u16 offset) +static inline u32 bcma_read32(struct bcma_device *core, u16 offset) { return core->bus->ops->read32(core, offset); } -extern inline +static inline void bcma_write8(struct bcma_device *core, u16 offset, u32 value) { core->bus->ops->write8(core, offset, value); } -extern inline +static inline void bcma_write16(struct bcma_device *core, u16 offset, u32 value) { core->bus->ops->write16(core, offset, value); } -extern inline +static inline void bcma_write32(struct bcma_device *core, u16 offset, u32 value) { core->bus->ops->write32(core, offset, value); } #ifdef CONFIG_BCMA_BLOCKIO -extern inline void bcma_block_read(struct bcma_device *core, void *buffer, +static inline void bcma_block_read(struct bcma_device *core, void *buffer, size_t count, u16 offset, u8 reg_width) { core->bus->ops->block_read(core, buffer, count, offset, reg_width); } -extern inline void bcma_block_write(struct bcma_device *core, const void *buffer, - size_t count, u16 offset, u8 reg_width) +static inline void bcma_block_write(struct bcma_device *core, + const void *buffer, size_t count, + u16 offset, u8 reg_width) { core->bus->ops->block_write(core, buffer, count, offset, reg_width); } #endif -extern inline u32 bcma_aread32(struct bcma_device *core, u16 offset) +static inline u32 bcma_aread32(struct bcma_device *core, u16 offset) { return core->bus->ops->aread32(core, offset); } -extern inline +static inline void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value) { core->bus->ops->awrite32(core, offset, value); -- cgit v1.2.3 From 5c3ddec73d01a1fae9409c197078cb02c42238c3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Dec 2011 16:44:22 -0500 Subject: net: Remove unused neighbour layer ops. It's simpler to just keep these things out until there is a real user of them, so we can see what the needs actually are, rather than keep these things around as useless overhead. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 603730804da5..6b9d4edb7c26 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,7 +974,6 @@ struct net_device_ops { int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); int (*ndo_neigh_construct)(struct neighbour *n); - void (*ndo_neigh_destroy)(struct neighbour *n); }; /* -- cgit v1.2.3 From 84c99db879314d58e0064f02b481f668f45d0070 Mon Sep 17 00:00:00 2001 From: Ashish Jangam Date: Mon, 12 Dec 2011 20:06:56 +0530 Subject: MFD: DA9052/53 MFD core module The DA9052/53 is a highly integrated PMIC subsystem with supply domain flexibility to support wide range of high performance application. It provides voltage regulators, GPIO controller, Touch Screen, RTC, Battery control and other functionality. This patch is functionally tested on Samsung SMDKV6410. Signed-off-by: David Dajun Chen Signed-off-by: Ashish Jangam Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- include/linux/mfd/da9052/da9052.h | 129 +++++++ include/linux/mfd/da9052/pdata.h | 40 ++ include/linux/mfd/da9052/reg.h | 749 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 918 insertions(+) create mode 100644 include/linux/mfd/da9052/da9052.h create mode 100644 include/linux/mfd/da9052/pdata.h create mode 100644 include/linux/mfd/da9052/reg.h (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h new file mode 100644 index 000000000000..c8899ab20549 --- /dev/null +++ b/include/linux/mfd/da9052/da9052.h @@ -0,0 +1,129 @@ +/* + * da9052 declarations for DA9052 PMICs. + * + * Copyright(c) 2011 Dialog Semiconductor Ltd. + * + * Author: David Dajun Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __MFD_DA9052_DA9052_H +#define __MFD_DA9052_DA9052_H + +#include +#include +#include +#include +#include +#include + +#include + +#define DA9052_IRQ_DCIN 0 +#define DA9052_IRQ_VBUS 1 +#define DA9052_IRQ_DCINREM 2 +#define DA9052_IRQ_VBUSREM 3 +#define DA9052_IRQ_VDDLOW 4 +#define DA9052_IRQ_ALARM 5 +#define DA9052_IRQ_SEQRDY 6 +#define DA9052_IRQ_COMP1V2 7 +#define DA9052_IRQ_NONKEY 8 +#define DA9052_IRQ_IDFLOAT 9 +#define DA9052_IRQ_IDGND 10 +#define DA9052_IRQ_CHGEND 11 +#define DA9052_IRQ_TBAT 12 +#define DA9052_IRQ_ADC_EOM 13 +#define DA9052_IRQ_PENDOWN 14 +#define DA9052_IRQ_TSIREADY 15 +#define DA9052_IRQ_GPI0 16 +#define DA9052_IRQ_GPI1 17 +#define DA9052_IRQ_GPI2 18 +#define DA9052_IRQ_GPI3 19 +#define DA9052_IRQ_GPI4 20 +#define DA9052_IRQ_GPI5 21 +#define DA9052_IRQ_GPI6 22 +#define DA9052_IRQ_GPI7 23 +#define DA9052_IRQ_GPI8 24 +#define DA9052_IRQ_GPI9 25 +#define DA9052_IRQ_GPI10 26 +#define DA9052_IRQ_GPI11 27 +#define DA9052_IRQ_GPI12 28 +#define DA9052_IRQ_GPI13 29 +#define DA9052_IRQ_GPI14 30 +#define DA9052_IRQ_GPI15 31 + +enum da9052_chip_id { + DA9052, + DA9053_AA, + DA9053_BA, + DA9053_BB, +}; + +struct da9052_pdata; + +struct da9052 { + struct mutex io_lock; + + struct device *dev; + struct regmap *regmap; + + int irq_base; + u8 chip_id; + + int chip_irq; +}; + +/* Device I/O API */ +static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg) +{ + int val, ret; + + ret = regmap_read(da9052->regmap, reg, &val); + if (ret < 0) + return ret; + return val; +} + +static inline int da9052_reg_write(struct da9052 *da9052, unsigned char reg, + unsigned char val) +{ + return regmap_write(da9052->regmap, reg, val); +} + +static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg, + unsigned reg_cnt, unsigned char *val) +{ + return regmap_bulk_read(da9052->regmap, reg, val, reg_cnt); +} + +static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg, + unsigned reg_cnt, unsigned char *val) +{ + return regmap_raw_write(da9052->regmap, reg, val, reg_cnt); +} + +static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg, + unsigned char bit_mask, + unsigned char reg_val) +{ + return regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val); +} + +int da9052_device_init(struct da9052 *da9052, u8 chip_id); +void da9052_device_exit(struct da9052 *da9052); + +#endif /* __MFD_DA9052_DA9052_H */ diff --git a/include/linux/mfd/da9052/pdata.h b/include/linux/mfd/da9052/pdata.h new file mode 100644 index 000000000000..62c5c3c2992e --- /dev/null +++ b/include/linux/mfd/da9052/pdata.h @@ -0,0 +1,40 @@ +/* + * Platform data declarations for DA9052 PMICs. + * + * Copyright(c) 2011 Dialog Semiconductor Ltd. + * + * Author: David Dajun Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __MFD_DA9052_PDATA_H__ +#define __MFD_DA9052_PDATA_H__ + +#define DA9052_MAX_REGULATORS 14 + +struct da9052; + +struct da9052_pdata { + struct led_platform_data *pled; + int (*init) (struct da9052 *da9052); + int irq_base; + int gpio_base; + int use_for_apm; + struct regulator_init_data *regulators[DA9052_MAX_REGULATORS]; +}; + +#endif diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h new file mode 100644 index 000000000000..b97f7309d7f6 --- /dev/null +++ b/include/linux/mfd/da9052/reg.h @@ -0,0 +1,749 @@ +/* + * Register declarations for DA9052 PMICs. + * + * Copyright(c) 2011 Dialog Semiconductor Ltd. + * + * Author: David Dajun Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_MFD_DA9052_REG_H +#define __LINUX_MFD_DA9052_REG_H + +/* PAGE REGISTERS */ +#define DA9052_PAGE0_CON_REG 0 +#define DA9052_PAGE1_CON_REG 128 + +/* STATUS REGISTERS */ +#define DA9052_STATUS_A_REG 1 +#define DA9052_STATUS_B_REG 2 +#define DA9052_STATUS_C_REG 3 +#define DA9052_STATUS_D_REG 4 + +/* EVENT REGISTERS */ +#define DA9052_EVENT_A_REG 5 +#define DA9052_EVENT_B_REG 6 +#define DA9052_EVENT_C_REG 7 +#define DA9052_EVENT_D_REG 8 +#define DA9052_FAULTLOG_REG 9 + +/* IRQ REGISTERS */ +#define DA9052_IRQ_MASK_A_REG 10 +#define DA9052_IRQ_MASK_B_REG 11 +#define DA9052_IRQ_MASK_C_REG 12 +#define DA9052_IRQ_MASK_D_REG 13 + +/* CONTROL REGISTERS */ +#define DA9052_CONTROL_A_REG 14 +#define DA9052_CONTROL_B_REG 15 +#define DA9052_CONTROL_C_REG 16 +#define DA9052_CONTROL_D_REG 17 + +#define DA9052_PDDIS_REG 18 +#define DA9052_INTERFACE_REG 19 +#define DA9052_RESET_REG 20 + +/* GPIO REGISTERS */ +#define DA9052_GPIO_0_1_REG 21 +#define DA9052_GPIO_2_3_REG 22 +#define DA9052_GPIO_4_5_REG 23 +#define DA9052_GPIO_6_7_REG 24 +#define DA9052_GPIO_14_15_REG 28 + +/* POWER SEQUENCER CONTROL REGISTERS */ +#define DA9052_ID_0_1_REG 29 +#define DA9052_ID_2_3_REG 30 +#define DA9052_ID_4_5_REG 31 +#define DA9052_ID_6_7_REG 32 +#define DA9052_ID_8_9_REG 33 +#define DA9052_ID_10_11_REG 34 +#define DA9052_ID_12_13_REG 35 +#define DA9052_ID_14_15_REG 36 +#define DA9052_ID_16_17_REG 37 +#define DA9052_ID_18_19_REG 38 +#define DA9052_ID_20_21_REG 39 +#define DA9052_SEQ_STATUS_REG 40 +#define DA9052_SEQ_A_REG 41 +#define DA9052_SEQ_B_REG 42 +#define DA9052_SEQ_TIMER_REG 43 + +/* LDO AND BUCK REGISTERS */ +#define DA9052_BUCKA_REG 44 +#define DA9052_BUCKB_REG 45 +#define DA9052_BUCKCORE_REG 46 +#define DA9052_BUCKPRO_REG 47 +#define DA9052_BUCKMEM_REG 48 +#define DA9052_BUCKPERI_REG 49 +#define DA9052_LDO1_REG 50 +#define DA9052_LDO2_REG 51 +#define DA9052_LDO3_REG 52 +#define DA9052_LDO4_REG 53 +#define DA9052_LDO5_REG 54 +#define DA9052_LDO6_REG 55 +#define DA9052_LDO7_REG 56 +#define DA9052_LDO8_REG 57 +#define DA9052_LDO9_REG 58 +#define DA9052_LDO10_REG 59 +#define DA9052_SUPPLY_REG 60 +#define DA9052_PULLDOWN_REG 61 +#define DA9052_CHGBUCK_REG 62 +#define DA9052_WAITCONT_REG 63 +#define DA9052_ISET_REG 64 +#define DA9052_BATCHG_REG 65 + +/* BATTERY CONTROL REGISTRS */ +#define DA9052_CHG_CONT_REG 66 +#define DA9052_INPUT_CONT_REG 67 +#define DA9052_CHG_TIME_REG 68 +#define DA9052_BBAT_CONT_REG 69 + +/* LED CONTROL REGISTERS */ +#define DA9052_BOOST_REG 70 +#define DA9052_LED_CONT_REG 71 +#define DA9052_LEDMIN123_REG 72 +#define DA9052_LED1_CONF_REG 73 +#define DA9052_LED2_CONF_REG 74 +#define DA9052_LED3_CONF_REG 75 +#define DA9052_LED1CONT_REG 76 +#define DA9052_LED2CONT_REG 77 +#define DA9052_LED3CONT_REG 78 +#define DA9052_LED_CONT_4_REG 79 +#define DA9052_LED_CONT_5_REG 80 + +/* ADC CONTROL REGISTERS */ +#define DA9052_ADC_MAN_REG 81 +#define DA9052_ADC_CONT_REG 82 +#define DA9052_ADC_RES_L_REG 83 +#define DA9052_ADC_RES_H_REG 84 +#define DA9052_VDD_RES_REG 85 +#define DA9052_VDD_MON_REG 86 + +#define DA9052_ICHG_AV_REG 87 +#define DA9052_ICHG_THD_REG 88 +#define DA9052_ICHG_END_REG 89 +#define DA9052_TBAT_RES_REG 90 +#define DA9052_TBAT_HIGHP_REG 91 +#define DA9052_TBAT_HIGHN_REG 92 +#define DA9052_TBAT_LOW_REG 93 +#define DA9052_T_OFFSET_REG 94 + +#define DA9052_ADCIN4_RES_REG 95 +#define DA9052_AUTO4_HIGH_REG 96 +#define DA9052_AUTO4_LOW_REG 97 +#define DA9052_ADCIN5_RES_REG 98 +#define DA9052_AUTO5_HIGH_REG 99 +#define DA9052_AUTO5_LOW_REG 100 +#define DA9052_ADCIN6_RES_REG 101 +#define DA9052_AUTO6_HIGH_REG 102 +#define DA9052_AUTO6_LOW_REG 103 + +#define DA9052_TJUNC_RES_REG 104 + +/* TSI CONTROL REGISTERS */ +#define DA9052_TSI_CONT_A_REG 105 +#define DA9052_TSI_CONT_B_REG 106 +#define DA9052_TSI_X_MSB_REG 107 +#define DA9052_TSI_Y_MSB_REG 108 +#define DA9052_TSI_LSB_REG 109 +#define DA9052_TSI_Z_MSB_REG 110 + +/* RTC COUNT REGISTERS */ +#define DA9052_COUNT_S_REG 111 +#define DA9052_COUNT_MI_REG 112 +#define DA9052_COUNT_H_REG 113 +#define DA9052_COUNT_D_REG 114 +#define DA9052_COUNT_MO_REG 115 +#define DA9052_COUNT_Y_REG 116 + +/* RTC CONTROL REGISTERS */ +#define DA9052_ALARM_MI_REG 117 +#define DA9052_ALARM_H_REG 118 +#define DA9052_ALARM_D_REG 119 +#define DA9052_ALARM_MO_REG 120 +#define DA9052_ALARM_Y_REG 121 +#define DA9052_SECOND_A_REG 122 +#define DA9052_SECOND_B_REG 123 +#define DA9052_SECOND_C_REG 124 +#define DA9052_SECOND_D_REG 125 + +/* PAGE CONFIGURATION BIT */ +#define DA9052_PAGE_CONF 0X80 + +/* STATUS REGISTER A BITS */ +#define DA9052_STATUSA_VDATDET 0X80 +#define DA9052_STATUSA_VBUSSEL 0X40 +#define DA9052_STATUSA_DCINSEL 0X20 +#define DA9052_STATUSA_VBUSDET 0X10 +#define DA9052_STATUSA_DCINDET 0X08 +#define DA9052_STATUSA_IDGND 0X04 +#define DA9052_STATUSA_IDFLOAT 0X02 +#define DA9052_STATUSA_NONKEY 0X01 + +/* STATUS REGISTER B BITS */ +#define DA9052_STATUSB_COMPDET 0X80 +#define DA9052_STATUSB_SEQUENCING 0X40 +#define DA9052_STATUSB_GPFB2 0X20 +#define DA9052_STATUSB_CHGTO 0X10 +#define DA9052_STATUSB_CHGEND 0X08 +#define DA9052_STATUSB_CHGLIM 0X04 +#define DA9052_STATUSB_CHGPRE 0X02 +#define DA9052_STATUSB_CHGATT 0X01 + +/* STATUS REGISTER C BITS */ +#define DA9052_STATUSC_GPI7 0X80 +#define DA9052_STATUSC_GPI6 0X40 +#define DA9052_STATUSC_GPI5 0X20 +#define DA9052_STATUSC_GPI4 0X10 +#define DA9052_STATUSC_GPI3 0X08 +#define DA9052_STATUSC_GPI2 0X04 +#define DA9052_STATUSC_GPI1 0X02 +#define DA9052_STATUSC_GPI0 0X01 + +/* STATUS REGISTER D BITS */ +#define DA9052_STATUSD_GPI15 0X80 +#define DA9052_STATUSD_GPI14 0X40 +#define DA9052_STATUSD_GPI13 0X20 +#define DA9052_STATUSD_GPI12 0X10 +#define DA9052_STATUSD_GPI11 0X08 +#define DA9052_STATUSD_GPI10 0X04 +#define DA9052_STATUSD_GPI9 0X02 +#define DA9052_STATUSD_GPI8 0X01 + +/* EVENT REGISTER A BITS */ +#define DA9052_EVENTA_ECOMP1V2 0X80 +#define DA9052_EVENTA_ESEQRDY 0X40 +#define DA9052_EVENTA_EALRAM 0X20 +#define DA9052_EVENTA_EVDDLOW 0X10 +#define DA9052_EVENTA_EVBUSREM 0X08 +#define DA9052_EVENTA_EDCINREM 0X04 +#define DA9052_EVENTA_EVBUSDET 0X02 +#define DA9052_EVENTA_EDCINDET 0X01 + +/* EVENT REGISTER B BITS */ +#define DA9052_EVENTB_ETSIREADY 0X80 +#define DA9052_EVENTB_EPENDOWN 0X40 +#define DA9052_EVENTB_EADCEOM 0X20 +#define DA9052_EVENTB_ETBAT 0X10 +#define DA9052_EVENTB_ECHGEND 0X08 +#define DA9052_EVENTB_EIDGND 0X04 +#define DA9052_EVENTB_EIDFLOAT 0X02 +#define DA9052_EVENTB_ENONKEY 0X01 + +/* EVENT REGISTER C BITS */ +#define DA9052_EVENTC_EGPI7 0X80 +#define DA9052_EVENTC_EGPI6 0X40 +#define DA9052_EVENTC_EGPI5 0X20 +#define DA9052_EVENTC_EGPI4 0X10 +#define DA9052_EVENTC_EGPI3 0X08 +#define DA9052_EVENTC_EGPI2 0X04 +#define DA9052_EVENTC_EGPI1 0X02 +#define DA9052_EVENTC_EGPI0 0X01 + +/* EVENT REGISTER D BITS */ +#define DA9052_EVENTD_EGPI15 0X80 +#define DA9052_EVENTD_EGPI14 0X40 +#define DA9052_EVENTD_EGPI13 0X20 +#define DA9052_EVENTD_EGPI12 0X10 +#define DA9052_EVENTD_EGPI11 0X08 +#define DA9052_EVENTD_EGPI10 0X04 +#define DA9052_EVENTD_EGPI9 0X02 +#define DA9052_EVENTD_EGPI8 0X01 + +/* IRQ MASK REGISTERS BITS */ +#define DA9052_M_NONKEY 0X0100 + +/* TSI EVENT REGISTERS BITS */ +#define DA9052_E_PEN_DOWN 0X4000 +#define DA9052_E_TSI_READY 0X8000 + +/* FAULT LOG REGISTER BITS */ +#define DA9052_FAULTLOG_WAITSET 0X80 +#define DA9052_FAULTLOG_NSDSET 0X40 +#define DA9052_FAULTLOG_KEYSHUT 0X20 +#define DA9052_FAULTLOG_TEMPOVER 0X08 +#define DA9052_FAULTLOG_VDDSTART 0X04 +#define DA9052_FAULTLOG_VDDFAULT 0X02 +#define DA9052_FAULTLOG_TWDERROR 0X01 + +/* CONTROL REGISTER A BITS */ +#define DA9052_CONTROLA_GPIV 0X80 +#define DA9052_CONTROLA_PMOTYPE 0X20 +#define DA9052_CONTROLA_PMOV 0X10 +#define DA9052_CONTROLA_PMIV 0X08 +#define DA9052_CONTROLA_PMIFV 0X08 +#define DA9052_CONTROLA_PWR1EN 0X04 +#define DA9052_CONTROLA_PWREN 0X02 +#define DA9052_CONTROLA_SYSEN 0X01 + +/* CONTROL REGISTER B BITS */ +#define DA9052_CONTROLB_SHUTDOWN 0X80 +#define DA9052_CONTROLB_DEEPSLEEP 0X40 +#define DA9052_CONTROL_B_WRITEMODE 0X20 +#define DA9052_CONTROLB_BBATEN 0X10 +#define DA9052_CONTROLB_OTPREADEN 0X08 +#define DA9052_CONTROLB_AUTOBOOT 0X04 +#define DA9052_CONTROLB_ACTDIODE 0X02 +#define DA9052_CONTROLB_BUCKMERGE 0X01 + +/* CONTROL REGISTER C BITS */ +#define DA9052_CONTROLC_BLINKDUR 0X80 +#define DA9052_CONTROLC_BLINKFRQ 0X60 +#define DA9052_CONTROLC_DEBOUNCING 0X1C +#define DA9052_CONTROLC_PMFB2PIN 0X02 +#define DA9052_CONTROLC_PMFB1PIN 0X01 + +/* CONTROL REGISTER D BITS */ +#define DA9052_CONTROLD_WATCHDOG 0X80 +#define DA9052_CONTROLD_ACCDETEN 0X40 +#define DA9052_CONTROLD_GPI1415SD 0X20 +#define DA9052_CONTROLD_NONKEYSD 0X10 +#define DA9052_CONTROLD_KEEPACTEN 0X08 +#define DA9052_CONTROLD_TWDSCALE 0X07 + +/* POWER DOWN DISABLE REGISTER BITS */ +#define DA9052_PDDIS_PMCONTPD 0X80 +#define DA9052_PDDIS_OUT32KPD 0X40 +#define DA9052_PDDIS_CHGBBATPD 0X20 +#define DA9052_PDDIS_CHGPD 0X10 +#define DA9052_PDDIS_HS2WIREPD 0X08 +#define DA9052_PDDIS_PMIFPD 0X04 +#define DA9052_PDDIS_GPADCPD 0X02 +#define DA9052_PDDIS_GPIOPD 0X01 + +/* CONTROL REGISTER D BITS */ +#define DA9052_INTERFACE_IFBASEADDR 0XE0 +#define DA9052_INTERFACE_NCSPOL 0X10 +#define DA9052_INTERFACE_RWPOL 0X08 +#define DA9052_INTERFACE_CPHA 0X04 +#define DA9052_INTERFACE_CPOL 0X02 +#define DA9052_INTERFACE_IFTYPE 0X01 + +/* CONTROL REGISTER D BITS */ +#define DA9052_RESET_RESETEVENT 0XC0 +#define DA9052_RESET_RESETTIMER 0X3F + +/* GPIO REGISTERS */ +/* GPIO CONTROL REGISTER BITS */ +#define DA9052_GPIO_EVEN_PORT_PIN 0X03 +#define DA9052_GPIO_EVEN_PORT_TYPE 0X04 +#define DA9052_GPIO_EVEN_PORT_MODE 0X08 + +#define DA9052_GPIO_ODD_PORT_PIN 0X30 +#define DA9052_GPIO_ODD_PORT_TYPE 0X40 +#define DA9052_GPIO_ODD_PORT_MODE 0X80 + +/*POWER SEQUENCER REGISTER BITS */ +/* SEQ CONTROL REGISTER BITS FOR ID 0 AND 1 */ +#define DA9052_ID01_LDO1STEP 0XF0 +#define DA9052_ID01_SYSPRE 0X04 +#define DA9052_ID01_DEFSUPPLY 0X02 +#define DA9052_ID01_NRESMODE 0X01 + +/* SEQ CONTROL REGISTER BITS FOR ID 2 AND 3 */ +#define DA9052_ID23_LDO3STEP 0XF0 +#define DA9052_ID23_LDO2STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 4 AND 5 */ +#define DA9052_ID45_LDO5STEP 0XF0 +#define DA9052_ID45_LDO4STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 6 AND 7 */ +#define DA9052_ID67_LDO7STEP 0XF0 +#define DA9052_ID67_LDO6STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 8 AND 9 */ +#define DA9052_ID89_LDO9STEP 0XF0 +#define DA9052_ID89_LDO8STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 10 AND 11 */ +#define DA9052_ID1011_PDDISSTEP 0XF0 +#define DA9052_ID1011_LDO10STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 12 AND 13 */ +#define DA9052_ID1213_VMEMSWSTEP 0XF0 +#define DA9052_ID1213_VPERISWSTEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 14 AND 15 */ +#define DA9052_ID1415_BUCKPROSTEP 0XF0 +#define DA9052_ID1415_BUCKCORESTEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 16 AND 17 */ +#define DA9052_ID1617_BUCKPERISTEP 0XF0 +#define DA9052_ID1617_BUCKMEMSTEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 18 AND 19 */ +#define DA9052_ID1819_GPRISE2STEP 0XF0 +#define DA9052_ID1819_GPRISE1STEP 0X0F + +/* SEQ CONTROL REGISTER BITS FOR ID 20 AND 21 */ +#define DA9052_ID2021_GPFALL2STEP 0XF0 +#define DA9052_ID2021_GPFALL1STEP 0X0F + +/* POWER SEQ STATUS REGISTER BITS */ +#define DA9052_SEQSTATUS_SEQPOINTER 0XF0 +#define DA9052_SEQSTATUS_WAITSTEP 0X0F + +/* POWER SEQ A REGISTER BITS */ +#define DA9052_SEQA_POWEREND 0XF0 +#define DA9052_SEQA_SYSTEMEND 0X0F + +/* POWER SEQ B REGISTER BITS */ +#define DA9052_SEQB_PARTDOWN 0XF0 +#define DA9052_SEQB_MAXCOUNT 0X0F + +/* POWER SEQ TIMER REGISTER BITS */ +#define DA9052_SEQTIMER_SEQDUMMY 0XF0 +#define DA9052_SEQTIMER_SEQTIME 0X0F + +/*POWER SUPPLY CONTROL REGISTER BITS */ +/* BUCK REGISTER A BITS */ +#define DA9052_BUCKA_BPROILIM 0XC0 +#define DA9052_BUCKA_BPROMODE 0X30 +#define DA9052_BUCKA_BCOREILIM 0X0C +#define DA9052_BUCKA_BCOREMODE 0X03 + +/* BUCK REGISTER B BITS */ +#define DA9052_BUCKB_BERIILIM 0XC0 +#define DA9052_BUCKB_BPERIMODE 0X30 +#define DA9052_BUCKB_BMEMILIM 0X0C +#define DA9052_BUCKB_BMEMMODE 0X03 + +/* BUCKCORE REGISTER BITS */ +#define DA9052_BUCKCORE_BCORECONF 0X80 +#define DA9052_BUCKCORE_BCOREEN 0X40 +#define DA9052_BUCKCORE_VBCORE 0X3F + +/* BUCKPRO REGISTER BITS */ +#define DA9052_BUCKPRO_BPROCONF 0X80 +#define DA9052_BUCKPRO_BPROEN 0X40 +#define DA9052_BUCKPRO_VBPRO 0X3F + +/* BUCKMEM REGISTER BITS */ +#define DA9052_BUCKMEM_BMEMCONF 0X80 +#define DA9052_BUCKMEM_BMEMEN 0X40 +#define DA9052_BUCKMEM_VBMEM 0X3F + +/* BUCKPERI REGISTER BITS */ +#define DA9052_BUCKPERI_BPERICONF 0X80 +#define DA9052_BUCKPERI_BPERIEN 0X40 +#define DA9052_BUCKPERI_BPERIHS 0X20 +#define DA9052_BUCKPERI_VBPERI 0X1F + +/* LDO1 REGISTER BITS */ +#define DA9052_LDO1_LDO1CONF 0X80 +#define DA9052_LDO1_LDO1EN 0X40 +#define DA9052_LDO1_VLDO1 0X1F + +/* LDO2 REGISTER BITS */ +#define DA9052_LDO2_LDO2CONF 0X80 +#define DA9052_LDO2_LDO2EN 0X40 +#define DA9052_LDO2_VLDO2 0X3F + +/* LDO3 REGISTER BITS */ +#define DA9052_LDO3_LDO3CONF 0X80 +#define DA9052_LDO3_LDO3EN 0X40 +#define DA9052_LDO3_VLDO3 0X3F + +/* LDO4 REGISTER BITS */ +#define DA9052_LDO4_LDO4CONF 0X80 +#define DA9052_LDO4_LDO4EN 0X40 +#define DA9052_LDO4_VLDO4 0X3F + +/* LDO5 REGISTER BITS */ +#define DA9052_LDO5_LDO5CONF 0X80 +#define DA9052_LDO5_LDO5EN 0X40 +#define DA9052_LDO5_VLDO5 0X3F + +/* LDO6 REGISTER BITS */ +#define DA9052_LDO6_LDO6CONF 0X80 +#define DA9052_LDO6_LDO6EN 0X40 +#define DA9052_LDO6_VLDO6 0X3F + +/* LDO7 REGISTER BITS */ +#define DA9052_LDO7_LDO7CONF 0X80 +#define DA9052_LDO7_LDO7EN 0X40 +#define DA9052_LDO7_VLDO7 0X3F + +/* LDO8 REGISTER BITS */ +#define DA9052_LDO8_LDO8CONF 0X80 +#define DA9052_LDO8_LDO8EN 0X40 +#define DA9052_LDO8_VLDO8 0X3F + +/* LDO9 REGISTER BITS */ +#define DA9052_LDO9_LDO9CONF 0X80 +#define DA9052_LDO9_LDO9EN 0X40 +#define DA9052_LDO9_VLDO9 0X3F + +/* LDO10 REGISTER BITS */ +#define DA9052_LDO10_LDO10CONF 0X80 +#define DA9052_LDO10_LDO10EN 0X40 +#define DA9052_LDO10_VLDO10 0X3F + +/* SUPPLY REGISTER BITS */ +#define DA9052_SUPPLY_VLOCK 0X80 +#define DA9052_SUPPLY_VMEMSWEN 0X40 +#define DA9052_SUPPLY_VPERISWEN 0X20 +#define DA9052_SUPPLY_VLDO3GO 0X10 +#define DA9052_SUPPLY_VLDO2GO 0X08 +#define DA9052_SUPPLY_VBMEMGO 0X04 +#define DA9052_SUPPLY_VBPROGO 0X02 +#define DA9052_SUPPLY_VBCOREGO 0X01 + +/* PULLDOWN REGISTER BITS */ +#define DA9052_PULLDOWN_LDO5PDDIS 0X20 +#define DA9052_PULLDOWN_LDO2PDDIS 0X10 +#define DA9052_PULLDOWN_LDO1PDDIS 0X08 +#define DA9052_PULLDOWN_MEMPDDIS 0X04 +#define DA9052_PULLDOWN_PROPDDIS 0X02 +#define DA9052_PULLDOWN_COREPDDIS 0X01 + +/* BAT CHARGER REGISTER BITS */ +/* CHARGER BUCK REGISTER BITS */ +#define DA9052_CHGBUCK_CHGTEMP 0X80 +#define DA9052_CHGBUCK_CHGUSBILIM 0X40 +#define DA9052_CHGBUCK_CHGBUCKLP 0X20 +#define DA9052_CHGBUCK_CHGBUCKEN 0X10 +#define DA9052_CHGBUCK_ISETBUCK 0X0F + +/* WAIT COUNTER REGISTER BITS */ +#define DA9052_WAITCONT_WAITDIR 0X80 +#define DA9052_WAITCONT_RTCCLOCK 0X40 +#define DA9052_WAITCONT_WAITMODE 0X20 +#define DA9052_WAITCONT_EN32KOUT 0X10 +#define DA9052_WAITCONT_DELAYTIME 0X0F + +/* ISET CONTROL REGISTER BITS */ +#define DA9052_ISET_ISETDCIN 0XF0 +#define DA9052_ISET_ISETVBUS 0X0F + +/* BATTERY CHARGER CONTROL REGISTER BITS */ +#define DA9052_BATCHG_ICHGPRE 0XC0 +#define DA9052_BATCHG_ICHGBAT 0X3F + +/* CHARGER COUNTER REGISTER BITS */ +#define DA9052_CHG_CONT_VCHG_BAT 0XF8 +#define DA9052_CHG_CONT_TCTR 0X07 + +/* INPUT CONTROL REGISTER BITS */ +#define DA9052_INPUT_CONT_TCTR_MODE 0X80 +#define DA9052_INPUT_CONT_VBUS_SUSP 0X10 +#define DA9052_INPUT_CONT_DCIN_SUSP 0X08 + +/* CHARGING TIME REGISTER BITS */ +#define DA9052_CHGTIME_CHGTIME 0XFF + +/* BACKUP BATTERY CONTROL REGISTER BITS */ +#define DA9052_BBATCONT_BCHARGERISET 0XF0 +#define DA9052_BBATCONT_BCHARGERVSET 0X0F + +/* LED REGISTERS BITS */ +/* LED BOOST REGISTER BITS */ +#define DA9052_BOOST_EBFAULT 0X80 +#define DA9052_BOOST_MBFAULT 0X40 +#define DA9052_BOOST_BOOSTFRQ 0X20 +#define DA9052_BOOST_BOOSTILIM 0X10 +#define DA9052_BOOST_LED3INEN 0X08 +#define DA9052_BOOST_LED2INEN 0X04 +#define DA9052_BOOST_LED1INEN 0X02 +#define DA9052_BOOST_BOOSTEN 0X01 + +/* LED CONTROL REGISTER BITS */ +#define DA9052_LEDCONT_SELLEDMODE 0X80 +#define DA9052_LEDCONT_LED3ICONT 0X40 +#define DA9052_LEDCONT_LED3RAMP 0X20 +#define DA9052_LEDCONT_LED3EN 0X10 +#define DA9052_LEDCONT_LED2RAMP 0X08 +#define DA9052_LEDCONT_LED2EN 0X04 +#define DA9052_LEDCONT_LED1RAMP 0X02 +#define DA9052_LEDCONT_LED1EN 0X01 + +/* LEDMIN123 REGISTER BIT */ +#define DA9052_LEDMIN123_LEDMINCURRENT 0XFF + +/* LED1CONF REGISTER BIT */ +#define DA9052_LED1CONF_LED1CURRENT 0XFF + +/* LED2CONF REGISTER BIT */ +#define DA9052_LED2CONF_LED2CURRENT 0XFF + +/* LED3CONF REGISTER BIT */ +#define DA9052_LED3CONF_LED3CURRENT 0XFF + +/* LED COUNT REGISTER BIT */ +#define DA9052_LED_CONT_DIM 0X80 + +/* ADC MAN REGISTERS BITS */ +#define DA9052_ADC_MAN_MAN_CONV 0X10 +#define DA9052_ADC_MAN_MUXSEL_VDDOUT 0X00 +#define DA9052_ADC_MAN_MUXSEL_ICH 0X01 +#define DA9052_ADC_MAN_MUXSEL_TBAT 0X02 +#define DA9052_ADC_MAN_MUXSEL_VBAT 0X03 +#define DA9052_ADC_MAN_MUXSEL_AD4 0X04 +#define DA9052_ADC_MAN_MUXSEL_AD5 0X05 +#define DA9052_ADC_MAN_MUXSEL_AD6 0X06 +#define DA9052_ADC_MAN_MUXSEL_VBBAT 0X09 + +/* ADC CONTROL REGSISTERS BITS */ +#define DA9052_ADCCONT_COMP1V2EN 0X80 +#define DA9052_ADCCONT_ADCMODE 0X40 +#define DA9052_ADCCONT_TBATISRCEN 0X20 +#define DA9052_ADCCONT_AD4ISRCEN 0X10 +#define DA9052_ADCCONT_AUTOAD6EN 0X08 +#define DA9052_ADCCONT_AUTOAD5EN 0X04 +#define DA9052_ADCCONT_AUTOAD4EN 0X02 +#define DA9052_ADCCONT_AUTOVDDEN 0X01 + +/* ADC 10 BIT MANUAL CONVERSION RESULT LOW REGISTER */ +#define DA9052_ADC_RES_LSB 0X03 + +/* ADC 10 BIT MANUAL CONVERSION RESULT HIGH REGISTER */ +#define DA9052_ADCRESH_ADCRESMSB 0XFF + +/* VDD RES REGSISTER BIT*/ +#define DA9052_VDDRES_VDDOUTRES 0XFF + +/* VDD MON REGSISTER BIT */ +#define DA9052_VDDMON_VDDOUTMON 0XFF + +/* ICHG_AV REGSISTER BIT */ +#define DA9052_ICHGAV_ICHGAV 0XFF + +/* ICHG_THD REGSISTER BIT */ +#define DA9052_ICHGTHD_ICHGTHD 0XFF + +/* ICHG_END REGSISTER BIT */ +#define DA9052_ICHGEND_ICHGEND 0XFF + +/* TBAT_RES REGSISTER BIT */ +#define DA9052_TBATRES_TBATRES 0XFF + +/* TBAT_HIGHP REGSISTER BIT */ +#define DA9052_TBATHIGHP_TBATHIGHP 0XFF + +/* TBAT_HIGHN REGSISTER BIT */ +#define DA9052_TBATHIGHN_TBATHIGHN 0XFF + +/* TBAT_LOW REGSISTER BIT */ +#define DA9052_TBATLOW_TBATLOW 0XFF + +/* T_OFFSET REGSISTER BIT */ +#define DA9052_TOFFSET_TOFFSET 0XFF + +/* ADCIN4_RES REGSISTER BIT */ +#define DA9052_ADCIN4RES_ADCIN4RES 0XFF + +/* ADCIN4_HIGH REGSISTER BIT */ +#define DA9052_AUTO4HIGH_AUTO4HIGH 0XFF + +/* ADCIN4_LOW REGSISTER BIT */ +#define DA9052_AUTO4LOW_AUTO4LOW 0XFF + +/* ADCIN5_RES REGSISTER BIT */ +#define DA9052_ADCIN5RES_ADCIN5RES 0XFF + +/* ADCIN5_HIGH REGSISTER BIT */ +#define DA9052_AUTO5HIGH_AUTOHIGH 0XFF + +/* ADCIN5_LOW REGSISTER BIT */ +#define DA9052_AUTO5LOW_AUTO5LOW 0XFF + +/* ADCIN6_RES REGSISTER BIT */ +#define DA9052_ADCIN6RES_ADCIN6RES 0XFF + +/* ADCIN6_HIGH REGSISTER BIT */ +#define DA9052_AUTO6HIGH_AUTO6HIGH 0XFF + +/* ADCIN6_LOW REGSISTER BIT */ +#define DA9052_AUTO6LOW_AUTO6LOW 0XFF + +/* TJUNC_RES REGSISTER BIT*/ +#define DA9052_TJUNCRES_TJUNCRES 0XFF + +/* TSI REGISTER */ +/* TSI CONTROL REGISTER A BITS */ +#define DA9052_TSICONTA_TSIDELAY 0XC0 +#define DA9052_TSICONTA_TSISKIP 0X38 +#define DA9052_TSICONTA_TSIMODE 0X04 +#define DA9052_TSICONTA_PENDETEN 0X02 +#define DA9052_TSICONTA_AUTOTSIEN 0X01 + +/* TSI CONTROL REGISTER B BITS */ +#define DA9052_TSICONTB_ADCREF 0X80 +#define DA9052_TSICONTB_TSIMAN 0X40 +#define DA9052_TSICONTB_TSIMUX 0X30 +#define DA9052_TSICONTB_TSISEL3 0X08 +#define DA9052_TSICONTB_TSISEL2 0X04 +#define DA9052_TSICONTB_TSISEL1 0X02 +#define DA9052_TSICONTB_TSISEL0 0X01 + +/* TSI X CO-ORDINATE MSB RESULT REGISTER BITS */ +#define DA9052_TSIXMSB_TSIXM 0XFF + +/* TSI Y CO-ORDINATE MSB RESULT REGISTER BITS */ +#define DA9052_TSIYMSB_TSIYM 0XFF + +/* TSI CO-ORDINATE LSB RESULT REGISTER BITS */ +#define DA9052_TSILSB_PENDOWN 0X40 +#define DA9052_TSILSB_TSIZL 0X30 +#define DA9052_TSILSB_TSIYL 0X0C +#define DA9052_TSILSB_TSIXL 0X03 + +/* TSI Z MEASUREMENT MSB RESULT REGISTER BIT */ +#define DA9052_TSIZMSB_TSIZM 0XFF + +/* RTC REGISTER */ +/* RTC TIMER SECONDS REGISTER BITS */ +#define DA9052_COUNTS_MONITOR 0X40 +#define DA9052_RTC_SEC 0X3F + +/* RTC TIMER MINUTES REGISTER BIT */ +#define DA9052_RTC_MIN 0X3F + +/* RTC TIMER HOUR REGISTER BIT */ +#define DA9052_RTC_HOUR 0X1F + +/* RTC TIMER DAYS REGISTER BIT */ +#define DA9052_RTC_DAY 0X1F + +/* RTC TIMER MONTHS REGISTER BIT */ +#define DA9052_RTC_MONTH 0X0F + +/* RTC TIMER YEARS REGISTER BIT */ +#define DA9052_RTC_YEAR 0X3F + +/* RTC ALARM MINUTES REGISTER BITS */ +#define DA9052_ALARMM_I_TICK_TYPE 0X80 +#define DA9052_ALARMMI_ALARMTYPE 0X40 + +/* RTC ALARM YEARS REGISTER BITS */ +#define DA9052_ALARM_Y_TICK_ON 0X80 +#define DA9052_ALARM_Y_ALARM_ON 0X40 + +/* RTC SECONDS REGISTER A BITS */ +#define DA9052_SECONDA_SECONDSA 0XFF + +/* RTC SECONDS REGISTER B BITS */ +#define DA9052_SECONDB_SECONDSB 0XFF + +/* RTC SECONDS REGISTER C BITS */ +#define DA9052_SECONDC_SECONDSC 0XFF + +/* RTC SECONDS REGISTER D BITS */ +#define DA9052_SECONDD_SECONDSD 0XFF + +#endif +/* __LINUX_MFD_DA9052_REG_H */ -- cgit v1.2.3 From 6261ddee70174372d6a75601f40719b7a5392f3f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 14 Dec 2011 11:19:07 -0800 Subject: kref: fix up the kfree build problems It turns out that some memory allocators use kobjects, which use krefs, and kref.h was wanting to figure out the address of kfree(), which ended up in a loop. kfree was only being needed for a warning to tell the caller that they were doing something stupid. Now we just move that warning into the comments for the functions, which results in a bit more fun as everyone enjoys digging for people to mock at times of boredom. So, remove the dependancy of slab.h on kref.h, and fix up the other include file as well (we really only need bug.h and atomic.h, not types.h). Reported-by: Stephen Rothwell Cc: Peter Zijlstra Cc: Alexey Dobriyan Cc: Eric Dumazet Cc: Ingo Molnar Cc: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index d66c88a3b48c..abc0120b09b7 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -15,8 +15,8 @@ #ifndef _KREF_H_ #define _KREF_H_ -#include -#include +#include +#include struct kref { atomic_t refcount; @@ -48,7 +48,10 @@ static inline void kref_get(struct kref *kref) * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree - * in as this function. + * in as this function. If the caller does pass kfree to this + * function, you will be publicly mocked mercilessly by the kref + * maintainer, and anyone else who happens to notice it. You have + * been warned. * * Subtract @count from the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this @@ -60,7 +63,6 @@ static inline int kref_sub(struct kref *kref, unsigned int count, void (*release)(struct kref *kref)) { WARN_ON(release == NULL); - WARN_ON(release == (void (*)(struct kref *))kfree); if (atomic_sub_and_test((int) count, &kref->refcount)) { release(kref); @@ -75,7 +77,10 @@ static inline int kref_sub(struct kref *kref, unsigned int count, * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree - * in as this function. + * in as this function. If the caller does pass kfree to this + * function, you will be publicly mocked mercilessly by the kref + * maintainer, and anyone else who happens to notice it. You have + * been warned. * * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this -- cgit v1.2.3 From 1ed28f610653e9b18433c6d87e9d333b7e3e886e Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 14 Dec 2011 16:43:09 +0100 Subject: NFC: Add a DEP link control netlink command NFC-DEP (Data Exchange Protocol) is an NFC MAC layer. This command allows to enable and disable the DEP link on to which e.g. LLCP can run. Signed-off-by: Samuel Ortiz Signed-off-by: John W. Linville --- include/linux/nfc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 36cb955b05cc..34d8303111f0 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -62,6 +62,8 @@ enum nfc_commands { NFC_CMD_GET_DEVICE, NFC_CMD_DEV_UP, NFC_CMD_DEV_DOWN, + NFC_CMD_DEP_LINK_UP, + NFC_CMD_DEP_LINK_DOWN, NFC_CMD_START_POLL, NFC_CMD_STOP_POLL, NFC_CMD_GET_TARGET, @@ -86,6 +88,8 @@ enum nfc_commands { * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the * target is not NFC-Forum compliant) + * @NFC_ATTR_COMM_MODE: Passive or active mode + * @NFC_ATTR_RF_MODE: Initiator or target */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -95,6 +99,8 @@ enum nfc_attrs { NFC_ATTR_TARGET_INDEX, NFC_ATTR_TARGET_SENS_RES, NFC_ATTR_TARGET_SEL_RES, + NFC_ATTR_COMM_MODE, + NFC_ATTR_RF_MODE, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -111,6 +117,14 @@ enum nfc_attrs { #define NFC_PROTO_MAX 6 +/* NFC communication modes */ +#define NFC_COMM_ACTIVE 0 +#define NFC_COMM_PASSIVE 1 + +/* NFC RF modes */ +#define NFC_RF_INITIATOR 0 +#define NFC_RF_TARGET 1 + /* NFC protocols masks used in bitsets */ #define NFC_PROTO_JEWEL_MASK (1 << NFC_PROTO_JEWEL) #define NFC_PROTO_MIFARE_MASK (1 << NFC_PROTO_MIFARE) -- cgit v1.2.3 From d646960f7986fefb460a2b062d5ccc8ccfeacc3a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 14 Dec 2011 16:43:12 +0100 Subject: NFC: Initial LLCP support This patch is an initial implementation for the NFC Logical Link Control protocol. It's also known as NFC peer to peer mode. This is a basic implementation as it lacks SDP (services Discovery Protocol), frames aggregation support, and frame rejecion parsing. Follow up patches will implement those missing features. This code has been tested against a Nexus S phone implementing LLCP 1.0. Signed-off-by: Samuel Ortiz Signed-off-by: John W. Linville --- include/linux/nfc.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 34d8303111f0..89fee4ab1904 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -139,9 +139,22 @@ struct sockaddr_nfc { __u32 nfc_protocol; }; +#define NFC_LLCP_MAX_SERVICE_NAME 63 +struct sockaddr_nfc_llcp { + sa_family_t sa_family; + __u32 dev_idx; + __u32 target_idx; + __u32 nfc_protocol; + __u8 dsap; /* Destination SAP, if known */ + __u8 ssap; /* Source SAP to be bound to */ + char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; + size_t service_name_len; +}; + /* NFC socket protocols */ #define NFC_SOCKPROTO_RAW 0 -#define NFC_SOCKPROTO_MAX 1 +#define NFC_SOCKPROTO_LLCP 1 +#define NFC_SOCKPROTO_MAX 2 #define NFC_HEADER_SIZE 1 -- cgit v1.2.3 From ca22e56debc57b47c422b749c93217ba62644be2 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 14 Dec 2011 14:29:38 -0800 Subject: driver-core: implement 'sysdev' functionality for regular devices and buses All sysdev classes and sysdev devices will converted to regular devices and buses to properly hook userspace into the event processing. There is no interesting difference between a 'sysdev' and 'device' which would justify to roll an entire own subsystem with different userspace export semantics. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are currently not properly available. Every converted sysdev class will create a regular device with the class name in /sys/devices/system and all registered devices will becom a children of theses devices. For compatibility reasons, the sysdev class-wide attributes are created at this parent device. (Do not copy that logic for anything new, subsystem- wide properties belong to the subsystem, not to some fake parent device created in /sys/devices.) Every sysdev driver is implemented as a simple subsystem interface now, and no longer called a driver. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 78 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 341fb740d851..7f9fc1505e94 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -53,6 +53,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * struct bus_type - The bus type of the device * * @name: The name of the bus. + * @dev_name: Used for subsystems to enumerate devices like ("foo%u", dev->id). + * @dev_root: Default device to use as the parent. * @bus_attrs: Default attributes of the bus. * @dev_attrs: Default attributes of the devices on the bus. * @drv_attrs: Default attributes of the device drivers on the bus. @@ -86,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); */ struct bus_type { const char *name; + const char *dev_name; + struct device *dev_root; struct bus_attribute *bus_attrs; struct device_attribute *dev_attrs; struct driver_attribute *drv_attrs; @@ -106,12 +110,30 @@ struct bus_type { struct subsys_private *p; }; -extern int __must_check bus_register(struct bus_type *bus); +/* This is a #define to keep the compiler from merging different + * instances of the __key variable */ +#define bus_register(subsys) \ +({ \ + static struct lock_class_key __key; \ + __bus_register(subsys, &__key); \ +}) +extern int __must_check __bus_register(struct bus_type *bus, + struct lock_class_key *key); extern void bus_unregister(struct bus_type *bus); extern int __must_check bus_rescan_devices(struct bus_type *bus); /* iterator helpers for buses */ +struct subsys_dev_iter { + struct klist_iter ki; + const struct device_type *type; +}; +void subsys_dev_iter_init(struct subsys_dev_iter *iter, + struct bus_type *subsys, + struct device *start, + const struct device_type *type); +struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); +void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); @@ -121,10 +143,10 @@ struct device *bus_find_device(struct bus_type *bus, struct device *start, struct device *bus_find_device_by_name(struct bus_type *bus, struct device *start, const char *name); - +struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id, + struct device *hint); int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); - void bus_sort_breadthfirst(struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)); @@ -255,6 +277,33 @@ struct device *driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *dev, void *data)); +/** + * struct subsys_interface - interfaces to device functions + * @name name of the device function + * @subsystem subsytem of the devices to attach to + * @node the list of functions registered at the subsystem + * @add device hookup to device function handler + * @remove device hookup to device function handler + * + * Simple interfaces attached to a subsystem. Multiple interfaces can + * attach to a subsystem and its devices. Unlike drivers, they do not + * exclusively claim or control devices. Interfaces usually represent + * a specific functionality of a subsystem/class of devices. + */ +struct subsys_interface { + const char *name; + struct bus_type *subsys; + struct list_head node; + int (*add_dev)(struct device *dev, struct subsys_interface *sif); + int (*remove_dev)(struct device *dev, struct subsys_interface *sif); +}; + +int subsys_interface_register(struct subsys_interface *sif); +void subsys_interface_unregister(struct subsys_interface *sif); + +int subsys_system_register(struct bus_type *subsys, + const struct attribute_group **groups); + /** * struct class - device classes * @name: Name of the class. @@ -438,8 +487,28 @@ struct device_attribute { const char *buf, size_t count); }; +struct dev_ext_attribute { + struct device_attribute attr; + void *var; +}; + +ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); +ssize_t device_show_int(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t device_store_int(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + #define DEVICE_ATTR(_name, _mode, _show, _store) \ -struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) + struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define DEVICE_ULONG_ATTR(_name, _mode, _var) \ + struct dev_ext_attribute dev_attr_##_name = \ + { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } +#define DEVICE_INT_ATTR(_name, _mode, _var) \ + struct dev_ext_attribute dev_attr_##_name = \ + { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } extern int __must_check device_create_file(struct device *device, const struct device_attribute *entry); @@ -603,6 +672,7 @@ struct device { struct device_node *of_node; /* associated device tree node */ dev_t devt; /* dev_t, creates the sysfs "dev" */ + u32 id; /* device instance */ spinlock_t devres_lock; struct list_head devres_head; -- cgit v1.2.3 From fe5ff8b84c8b03348a2f64ea9d884348faec2217 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 14 Dec 2011 15:21:07 -0800 Subject: edac: convert sysdev_class to a regular subsystem After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Cc: Doug Thompson Cc: Paul Gortmaker Cc: Lucas De Marchi Cc: Borislav Petkov Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/edac.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 055b248bdd53..1cd3947987e5 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -13,7 +13,7 @@ #define _LINUX_EDAC_H_ #include -#include +#include #define EDAC_OPSTATE_INVAL -1 #define EDAC_OPSTATE_POLL 0 @@ -23,12 +23,12 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; -extern struct sysdev_class edac_class; +extern struct bus_type edac_subsys; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); -extern struct sysdev_class *edac_get_sysfs_class(void); -extern void edac_put_sysfs_class(void); +extern struct bus_type *edac_get_sysfs_subsys(void); +extern void edac_put_sysfs_subsys(void); static inline void opstate_init(void) { -- cgit v1.2.3 From 3c8bedb7e42dacc141b1c42b01d9c309dc4ac462 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 15 Dec 2011 14:52:37 +0800 Subject: mfd: Declare da9052_regmap_config for the bus drivers Fixes build failures. Reported-by: Stephen Rothwell Signed-off-by: Mark Brown --- include/linux/mfd/da9052/da9052.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index c8899ab20549..5702d1be13b4 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -126,4 +126,6 @@ static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg, int da9052_device_init(struct da9052 *da9052, u8 chip_id); void da9052_device_exit(struct da9052 *da9052); +extern struct regmap_config da9052_regmap_config; + #endif /* __MFD_DA9052_DA9052_H */ -- cgit v1.2.3 From abd63bc3a0f65ae9d85bc3b1bb067d3e3c2b2cc2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 14 Dec 2011 14:39:26 -0800 Subject: sched: Mark parent and real_parent as __rcu The parent and real_parent pointers should be considered __rcu, since they should be held under either tasklist_lock or rcu_read_lock. Signed-off-by: Kees Cook Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Al Viro Link: http://lkml.kernel.org/r/20111214223925.GA27578@www.outflux.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cc8c6206657f..5ef09012a629 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1330,8 +1330,8 @@ struct task_struct { * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process */ - struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ + struct task_struct __rcu *real_parent; /* real parent process */ + struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ /* * children/sibling forms the list of my natural children */ -- cgit v1.2.3 From 648616343cdbe904c585a6c12e323d3b3c72e46f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 15 Dec 2011 14:56:09 +0100 Subject: [S390] cputime: add sparse checking and cleanup Make cputime_t and cputime64_t nocast to enable sparse checking to detect incorrect use of cputime. Drop the cputime macros for simple scalar operations. The conversion macros are still needed. Signed-off-by: Martin Schwidefsky --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc5..5649032d73fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -483,8 +483,8 @@ struct task_cputime { #define INIT_CPUTIME \ (struct task_cputime) { \ - .utime = cputime_zero, \ - .stime = cputime_zero, \ + .utime = 0, \ + .stime = 0, \ .sum_exec_runtime = 0, \ } -- cgit v1.2.3 From 888bdaa9b2c426dcca214e6efd388080938082cb Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 14 Dec 2011 23:34:31 +0000 Subject: Move limit definitions outside CONFIG_INET They need to be available for other protocols as well, since they are used in sock.c openly Signed-off-by: Glauber Costa CC: Hiroyouki Kamezawa CC: David S. Miller CC: Eric Dumazet CC: Stephen Rothwell Signed-off-by: David S. Miller --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1513994ce207..9b296ea41bb8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -384,13 +384,13 @@ mem_cgroup_print_bad_page(struct page *page) } #endif -#ifdef CONFIG_INET enum { UNDER_LIMIT, SOFT_LIMIT, OVER_LIMIT, }; +#ifdef CONFIG_INET struct sock; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM void sock_update_memcg(struct sock *sk); -- cgit v1.2.3 From bdd90d5e36a55271beb957b3d7ca3e29b2a90207 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Dec 2011 12:20:27 +0100 Subject: cfg80211: validate nl80211 station handling better The nl80211 station handling code is a bit messy and doesn't do a lot of validation. It seems like this could be an issue for drivers that don't use mac80211 to validate everything. As cfg80211 doesn't keep station state, move the validation of allowing supported_rates to change for TDLS only in station mode to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index a18760684fc9..f795cb7dccdd 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1536,7 +1536,11 @@ enum nl80211_iftype { * @NL80211_STA_FLAG_WME: station is WME/QoS capable * @NL80211_STA_FLAG_MFP: station uses management frame protection * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated - * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer + * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should + * only be used in managed mode (even in the flags mask). Note that the + * flag can't be changed, it is only valid while adding a station, and + * attempts to change it will silently be ignored (rather than rejected + * as errors.) * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ -- cgit v1.2.3 From 8bc1f85c02a20a59956b00b3acea12c04dce9ae8 Mon Sep 17 00:00:00 2001 From: Eugeni Dodonov Date: Wed, 23 Nov 2011 16:42:14 -0200 Subject: iommu: Export intel_iommu_enabled to signal when iommu is in use In i915 driver, we do not enable either rc6 or semaphores on SNB when dmar is enabled. The new 'intel_iommu_enabled' variable signals when the iommu code is in operation. Cc: Ted Phelps Cc: Peter Cc: Lukas Hejtmanek Cc: Andrew Lutomirski CC: Daniel Vetter Cc: Eugeni Dodonov Signed-off-by: Keith Packard --- include/linux/dma_remapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index ef90cbd8e173..57c9a8ae4f2d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -31,6 +31,7 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; +extern int intel_iommu_enabled; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { @@ -44,6 +45,7 @@ static inline void free_dmar_iommu(struct intel_iommu *iommu) { } #define dmar_disabled (1) +#define intel_iommu_enabled (0) #endif -- cgit v1.2.3 From e7c466e58eb1ff9bf49c2f3902622dc11a8c7022 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:42:42 +0000 Subject: sock_diag: Move the SOCK_DIAG_BY_FAMILY cmd declaration It should belong to sock_diag, not inet_diag. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 - include/linux/sock_diag.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 78972a149dff..a27e62178d43 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -6,7 +6,6 @@ /* Just some random number */ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 -#define SOCK_DIAG_BY_FAMILY 20 #define INET_DIAG_GETSOCK_MAX 24 diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index ba4933b1213b..7999778ad08d 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,5 +1,8 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ + +#define SOCK_DIAG_BY_FAMILY 20 + struct sk_buff; struct nlmsghdr; -- cgit v1.2.3 From f65c1b534b99aef1809b893387b295963821549f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:43:44 +0000 Subject: sock_diag: Generalize requests cookies managements The sk address is used as a cookie between dump/get_exact calls. It will be required for unix socket sdumping, so move it from inet_diag to sock_diag. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 - include/linux/sock_diag.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index a27e62178d43..afa5d5c74169 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -168,7 +168,6 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct inet_diag_req *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); -int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req); extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 7999778ad08d..379d5dccf8e1 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -22,5 +22,8 @@ void sock_diag_unregister(struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +int sock_diag_check_cookie(void *sk, __u32 *cookie); +void sock_diag_save_cookie(void *sk, __u32 *cookie); + extern struct sock *sock_diag_nlsk; #endif -- cgit v1.2.3 From 22931d3b906cd0a1726a49a09713f9220a5fab8a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:44:35 +0000 Subject: unix_diag: Basic module skeleton Includes basic module_init/_exit functionality, dump/get_exact stubs and declares the basic API structures for request and response. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/unix_diag.h (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h new file mode 100644 index 000000000000..445184a85763 --- /dev/null +++ b/include/linux/unix_diag.h @@ -0,0 +1,24 @@ +#ifndef __UNIX_DIAG_H__ +#define __UNIX_DIAG_H__ + +struct unix_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 udiag_states; + __u32 udiag_ino; + __u32 udiag_show; + __u32 udiag_cookie[2]; +}; + +struct unix_diag_msg { + __u8 udiag_family; + __u8 udiag_type; + __u8 udiag_state; + __u8 pad; + + __u32 udiag_ino; + __u32 udiag_cookie[2]; +}; + +#endif -- cgit v1.2.3 From f5248b48a64c221dd6157ab9cbee5a36ee45e6ed Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:45:24 +0000 Subject: unix_diag: Unix socket name NLA Report the sun_path when requested as NLA. With leading '\0' if present but without the leading AF_UNIX bits. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 445184a85763..cc4df34d4c14 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -11,6 +11,8 @@ struct unix_diag_req { __u32 udiag_cookie[2]; }; +#define UDIAG_SHOW_NAME 0x00000001 /* show name (not path) */ + struct unix_diag_msg { __u8 udiag_family; __u8 udiag_type; @@ -21,4 +23,10 @@ struct unix_diag_msg { __u32 udiag_cookie[2]; }; +enum { + UNIX_DIAG_NAME, + + UNIX_DIAG_MAX, +}; + #endif -- cgit v1.2.3 From 5f7b0569460b7d8d01ca776430a00505a68b7584 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:45:43 +0000 Subject: unix_diag: Unix inode info NLA Actually, the socket path if it's not anonymous doesn't give a clue to which file the socket is bound to. Even if the path is absolute, it can be unlinked and then new socket can be bound to it. With this NLA it's possible to check which file a particular socket is really bound to. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index cc4df34d4c14..3e53adbe9c7f 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -12,6 +12,7 @@ struct unix_diag_req { }; #define UDIAG_SHOW_NAME 0x00000001 /* show name (not path) */ +#define UDIAG_SHOW_VFS 0x00000002 /* show VFS inode info */ struct unix_diag_msg { __u8 udiag_family; @@ -25,8 +26,14 @@ struct unix_diag_msg { enum { UNIX_DIAG_NAME, + UNIX_DIAG_VFS, UNIX_DIAG_MAX, }; +struct unix_diag_vfs { + __u32 udiag_vfs_ino; + __u32 udiag_vfs_dev; +}; + #endif -- cgit v1.2.3 From ac02be8d96af9f66a4de86781ee9facc2dff99d4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:45:58 +0000 Subject: unix_diag: Unix peer inode NLA Report the peer socket inode ID as NLA. With this it's finally possible to find out the other end of an interesting unix connection. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 3e53adbe9c7f..2d74a86024ac 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -13,6 +13,7 @@ struct unix_diag_req { #define UDIAG_SHOW_NAME 0x00000001 /* show name (not path) */ #define UDIAG_SHOW_VFS 0x00000002 /* show VFS inode info */ +#define UDIAG_SHOW_PEER 0x00000004 /* show peer socket info */ struct unix_diag_msg { __u8 udiag_family; @@ -27,6 +28,7 @@ struct unix_diag_msg { enum { UNIX_DIAG_NAME, UNIX_DIAG_VFS, + UNIX_DIAG_PEER, UNIX_DIAG_MAX, }; -- cgit v1.2.3 From 2aac7a2cb0d9d8c65fc7dde3e19e46b3e878d23d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:46:14 +0000 Subject: unix_diag: Pending connections IDs NLA When establishing a unix connection on stream sockets the server end receives an skb with socket in its receive queue. Report who is waiting for these ends to be accepted for listening sockets via NLA. There's a lokcing issue with this -- the unix sk state lock is required to access the peer, and it is taken under the listening sk's queue lock. Strictly speaking the queue lock should be taken inside the state lock, but since in this case these two sockets are different it shouldn't lead to deadlock. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 2d74a86024ac..03ffb7de15b6 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -14,6 +14,7 @@ struct unix_diag_req { #define UDIAG_SHOW_NAME 0x00000001 /* show name (not path) */ #define UDIAG_SHOW_VFS 0x00000002 /* show VFS inode info */ #define UDIAG_SHOW_PEER 0x00000004 /* show peer socket info */ +#define UDIAG_SHOW_ICONS 0x00000008 /* show pending connections */ struct unix_diag_msg { __u8 udiag_family; @@ -29,6 +30,7 @@ enum { UNIX_DIAG_NAME, UNIX_DIAG_VFS, UNIX_DIAG_PEER, + UNIX_DIAG_ICONS, UNIX_DIAG_MAX, }; -- cgit v1.2.3 From cbf391958afb9b82c72324a15891eb3102200085 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:46:31 +0000 Subject: unix_diag: Receive queue lenght NLA Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 03ffb7de15b6..3f7afb007d70 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -15,6 +15,7 @@ struct unix_diag_req { #define UDIAG_SHOW_VFS 0x00000002 /* show VFS inode info */ #define UDIAG_SHOW_PEER 0x00000004 /* show peer socket info */ #define UDIAG_SHOW_ICONS 0x00000008 /* show pending connections */ +#define UDIAG_SHOW_RQLEN 0x00000010 /* show skb receive queue len */ struct unix_diag_msg { __u8 udiag_family; @@ -31,6 +32,7 @@ enum { UNIX_DIAG_VFS, UNIX_DIAG_PEER, UNIX_DIAG_ICONS, + UNIX_DIAG_RQLEN, UNIX_DIAG_MAX, }; -- cgit v1.2.3 From 14596f7006297b67516e2b6a2b26bcb11fe08fb3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 Dec 2011 13:51:16 +0000 Subject: ethtool: Clarify use of size field for ETHTOOL_GRXFHINDIR In order to find out the device's RX flow hash table size, ethtool initially uses ETHTOOL_GRXFHINDIR with a buffer size of zero. This must be supported, but it is not necessary to support any other user buffer size less than the device table size. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 20db5b275c3f..0ec2fd412d03 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -543,8 +543,9 @@ struct compat_ethtool_rxnfc { /** * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR - * @size: On entry, the array size of the user buffer. On return from - * %ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table. + * @size: On entry, the array size of the user buffer, which may be zero + * for %ETHTOOL_GRXFHINDIR. On return from %ETHTOOL_GRXFHINDIR, the + * array size of the hardware indirection table. * @ring_index: RX ring/queue index for each hash value */ struct ethtool_rxfh_indir { -- cgit v1.2.3 From 7850f63f1620512631445b901ae11cd149e7375c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 Dec 2011 13:55:01 +0000 Subject: ethtool: Centralise validation of ETHTOOL_{G, S}RXFHINDIR parameters Add a new ethtool operation (get_rxfh_indir_size) to get the indirectional table size. Use this to validate the user buffer size before calling get_rxfh_indir or set_rxfh_indir. Use get_rxnfc to get the number of RX rings, and validate the contents of the new indirection table before calling set_rxfh_indir. Remove this validation from drivers. Signed-off-by: Ben Hutchings Acked-by: Dimitris Michailidis Signed-off-by: David S. Miller --- include/linux/ethtool.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0ec2fd412d03..3b9f09d55b5c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -828,9 +828,13 @@ u32 ethtool_op_get_link(struct net_device *dev); * error code or zero. * @set_rx_ntuple: Set an RX n-tuple rule. Returns a negative error code * or zero. + * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. + * Returns zero if not supported for this specific device. * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. + * Will not be called if @get_rxfh_indir_size returns zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. + * Will not be called if @get_rxfh_indir_size returns zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -894,10 +898,9 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); int (*set_rx_ntuple)(struct net_device *, struct ethtool_rx_ntuple *); - int (*get_rxfh_indir)(struct net_device *, - struct ethtool_rxfh_indir *); - int (*set_rxfh_indir)(struct net_device *, - const struct ethtool_rxfh_indir *); + u32 (*get_rxfh_indir_size)(struct net_device *); + int (*get_rxfh_indir)(struct net_device *, u32 *); + int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); -- cgit v1.2.3 From 278bc4296bd64ffd1d3913b487dc8a520e423a7a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 Dec 2011 13:56:49 +0000 Subject: ethtool: Define and apply a default policy for RX flow hash indirection All drivers that support modification of the RX flow hash indirection table initialise it in the same way: RX rings are assigned to table entries in rotation. Make that default policy explicit by having them call a ethtool_rxfh_indir_default() function. In the ethtool core, add support for a zero size value for ETHTOOL_SRXFHINDIR, which resets the table to this default. Partly-suggested-by: Matt Carlson Signed-off-by: Ben Hutchings Acked-by: Shreyas N Bhatewara Signed-off-by: David S. Miller --- include/linux/ethtool.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3b9f09d55b5c..b38bf69310ee 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -543,10 +543,15 @@ struct compat_ethtool_rxnfc { /** * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR - * @size: On entry, the array size of the user buffer, which may be zero - * for %ETHTOOL_GRXFHINDIR. On return from %ETHTOOL_GRXFHINDIR, the - * array size of the hardware indirection table. + * @size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware + * indirection table. * @ring_index: RX ring/queue index for each hash value + * + * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size + * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means + * the table should be reset to default values. This last feature + * is not supported by the original implementations. */ struct ethtool_rxfh_indir { __u32 cmd; @@ -749,6 +754,18 @@ struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); +/** + * ethtool_rxfh_indir_default - get default value for RX flow hash indirection + * @index: Index in RX flow hash indirection table + * @n_rx_rings: Number of RX rings to use + * + * This function provides the default policy for RX flow hash indirection. + */ +static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) +{ + return index % n_rx_rings; +} + /** * struct ethtool_ops - optional netdev operations * @get_settings: Get various device settings including Ethernet link -- cgit v1.2.3 From b1b73d095084e754562961c443aa8f6587a55f8e Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Mon, 19 Dec 2011 18:13:19 +0900 Subject: time/clocksource: Fix kernel-doc warnings Fix various KernelDoc build warnings. Signed-off-by: Kusanagi Kouichi Cc: John Stultz Link: http://lkml.kernel.org/r/20111219091320.0D5AF6FC03D@msa105.auone-net.jp Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c86c940d1de3..081147da0564 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -71,7 +71,7 @@ struct timecounter { /** * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds - * @tc: Pointer to cycle counter. + * @cc: Pointer to cycle counter. * @cycles: Cycles * * XXX - This could use some mult_lxl_ll() asm optimization. Same code @@ -114,7 +114,7 @@ extern u64 timecounter_read(struct timecounter *tc); * time base as values returned by * timecounter_read() * @tc: Pointer to time counter. - * @cycle: a value returned by tc->cc->read() + * @cycle_tstamp: a value returned by tc->cc->read() * * Cycle counts that are converted correctly as long as they * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], @@ -156,11 +156,12 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) - * @maxadj maximum adjustment value to mult (~11%) + * @maxadj: maximum adjustment value to mult (~11%) * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @cycle_last: most recent cycle counter value seen by ::read() */ struct clocksource { /* @@ -187,6 +188,7 @@ struct clocksource { void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; @@ -261,6 +263,9 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) /** * clocksource_cyc2ns - converts clocksource cycles to nanoseconds + * @cycles: cycles + * @mult: cycle to nanosecond multiplier + * @shift: cycle to nanosecond divisor (power of two) * * Converts cycles to nanoseconds, using the given mult and shift. * -- cgit v1.2.3 From a85e1d55974646a442d95911e3f7d7a891ea9ac5 Mon Sep 17 00:00:00 2001 From: Paul Stewart Date: Fri, 9 Dec 2011 11:01:49 -0800 Subject: cfg80211: Return beacon loss count in station If station info contains a beacon loss count, return it to userspace. Signed-off-by: Paul Stewart Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index f795cb7dccdd..0f5ff3739820 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1655,6 +1655,7 @@ enum nl80211_sta_bss_param { * containing info as possible, see &enum nl80211_sta_bss_param * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. + * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -1677,6 +1678,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_BSS_PARAM, NL80211_STA_INFO_CONNECTED_TIME, NL80211_STA_INFO_STA_FLAGS, + NL80211_STA_INFO_BEACON_LOSS, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 58a60168d12c4e5be21c29420a3de4a41ef3470f Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Mon, 19 Dec 2011 04:00:26 +0000 Subject: mlx4: capability for link sensing For ConnectX3 devices, we allow link sensing only if FW explicitly reported it supports the feature. For older versions (ConnectX1 and 2), if the card supports both link layer types (Ethenet and Infiniband), link sensing is supported. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5f784ff6a36e..b06a44ba1565 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -94,7 +94,8 @@ enum { MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40, MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, - MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48 + MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55 }; #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) -- cgit v1.2.3 From 8d0fc7b61191c9433a4f738987b89e1d962eb637 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Mon, 19 Dec 2011 04:00:34 +0000 Subject: mlx4_core: Changing link sensing logic New FW can give clues to driver regarding default port type and whether or not we should default to link sensing on the port. 2 bits are added to QUERY_PORT command: 1. suggested_type: This bit gives a hint whether the default port type should be IB or Ethernet. The driver will use this hint in case the user didn't specify explicitly the link layer type he wants to set. 2. default_sense: If this bit is set, we would sense the port type on start-up and default the port to link sensing Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b06a44ba1565..5c4fe8e5bfe5 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -303,6 +303,8 @@ struct mlx4_caps { int log_num_prios; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; + u8 suggested_type[MLX4_MAX_PORTS + 1]; + u8 default_sense[MLX4_MAX_PORTS + 1]; u32 port_mask[MLX4_MAX_PORTS + 1]; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; -- cgit v1.2.3 From 447f219190bf0368b8b36cf60155744cb43510df Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 19 Dec 2011 15:04:41 -0500 Subject: Revert "net: Remove unused neighbour layer ops." This reverts commit 5c3ddec73d01a1fae9409c197078cb02c42238c3. S390 qeth driver actually still uses the setup ops. Reported-by: Frank Blaschka Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6b9d4edb7c26..603730804da5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,6 +974,7 @@ struct net_device_ops { int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); int (*ndo_neigh_construct)(struct neighbour *n); + void (*ndo_neigh_destroy)(struct neighbour *n); }; /* -- cgit v1.2.3 From ab56222a32b9dbaae19c1d37f07b0ac4fc3c27ec Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Tue, 20 Dec 2011 13:23:24 +0000 Subject: tcp: Replace constants with #define macros to record the state of SACK/FACK and DSACK for better readability and maintenance. Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7f59ee946983..46a85c9e1f25 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -238,6 +238,11 @@ struct tcp_sack_block { u32 end_seq; }; +/*These are used to set the sack_ok field in struct tcp_options_received */ +#define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ +#define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ +#define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ + struct tcp_options_received { /* PAWS/RTTM data */ long ts_recent_stamp;/* Time we stored ts_recent (for aging) */ -- cgit v1.2.3 From f07fdec50a13f134ea9608c8fb3f6408c58ef55e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 13 Dec 2011 13:20:54 +0100 Subject: lockdep/waitqueues: Add better annotation -> #2 (&tty->write_wait){-.-...}: is a lot more informative than: -> #2 (key#19){-.....}: Signed-off-by: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-8zpopbny51023rdb0qq67eye@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 3efc9f3f43a0..a9ce45e8501c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -77,13 +77,13 @@ struct task_struct; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } -extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *); +extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); #define init_waitqueue_head(q) \ do { \ static struct lock_class_key __key; \ \ - __init_waitqueue_head((q), &__key); \ + __init_waitqueue_head((q), #q, &__key); \ } while (0) #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From c37e17497e01fc0f5d2d6feb5723b210b3ab8890 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sun, 11 Dec 2011 00:28:52 +0100 Subject: perf events: Add PERF_COUNT_HW_REF_CPU_CYCLES generic PMU event This event counts the number of reference core cpu cycles. Reference means that the event increments at a constant rate which is not subject to core CPU frequency adjustments. The event may not count when the processor is in halted (low power) state. As such, it may not be equivalent to wall clock time. However, when the processor is not halted state, the event keeps a constant correlation with wall clock time. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1323559734-3488-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 564769cdb473..08855613ceb3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ enum perf_hw_id { PERF_COUNT_HW_BUS_CYCLES = 6, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, PERF_COUNT_HW_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 9b39e73d0c2b265a7f8748b0e9a9f09be84079a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:24 +0100 Subject: PM / Sleep: Remove forward-only callbacks from platform bus type The forward-only PM callbacks provided by the platform bus type are not necessary any more, because the PM core executes driver callbacks when the corresponding subsystem callbacks are not present, so drop them. Signed-off-by: Rafael J. Wysocki --- include/linux/platform_device.h | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 2a23f7d1a825..b5267c951161 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -264,62 +264,34 @@ static inline char *early_platform_driver_setup_func(void) \ } #endif /* MODULE */ -#ifdef CONFIG_PM_SLEEP -extern int platform_pm_prepare(struct device *dev); -extern void platform_pm_complete(struct device *dev); -#else -#define platform_pm_prepare NULL -#define platform_pm_complete NULL -#endif - #ifdef CONFIG_SUSPEND extern int platform_pm_suspend(struct device *dev); -extern int platform_pm_suspend_noirq(struct device *dev); extern int platform_pm_resume(struct device *dev); -extern int platform_pm_resume_noirq(struct device *dev); #else #define platform_pm_suspend NULL #define platform_pm_resume NULL -#define platform_pm_suspend_noirq NULL -#define platform_pm_resume_noirq NULL #endif #ifdef CONFIG_HIBERNATE_CALLBACKS extern int platform_pm_freeze(struct device *dev); -extern int platform_pm_freeze_noirq(struct device *dev); extern int platform_pm_thaw(struct device *dev); -extern int platform_pm_thaw_noirq(struct device *dev); extern int platform_pm_poweroff(struct device *dev); -extern int platform_pm_poweroff_noirq(struct device *dev); extern int platform_pm_restore(struct device *dev); -extern int platform_pm_restore_noirq(struct device *dev); #else #define platform_pm_freeze NULL #define platform_pm_thaw NULL #define platform_pm_poweroff NULL #define platform_pm_restore NULL -#define platform_pm_freeze_noirq NULL -#define platform_pm_thaw_noirq NULL -#define platform_pm_poweroff_noirq NULL -#define platform_pm_restore_noirq NULL #endif #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ - .prepare = platform_pm_prepare, \ - .complete = platform_pm_complete, \ .suspend = platform_pm_suspend, \ .resume = platform_pm_resume, \ .freeze = platform_pm_freeze, \ .thaw = platform_pm_thaw, \ .poweroff = platform_pm_poweroff, \ - .restore = platform_pm_restore, \ - .suspend_noirq = platform_pm_suspend_noirq, \ - .resume_noirq = platform_pm_resume_noirq, \ - .freeze_noirq = platform_pm_freeze_noirq, \ - .thaw_noirq = platform_pm_thaw_noirq, \ - .poweroff_noirq = platform_pm_poweroff_noirq, \ - .restore_noirq = platform_pm_restore_noirq, + .restore = platform_pm_restore, #else #define USE_PLATFORM_PM_SLEEP_OPS #endif -- cgit v1.2.3 From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:42 +0100 Subject: PM: Drop generic_subsys_pm_ops Since the PM core is now going to execute driver callbacks directly if the corresponding subsystem callbacks are not present, forward-only subsystem callbacks (i.e. such that only execute the corresponding driver callbacks) are not necessary any more. Thus it is possible to remove generic_subsys_pm_ops, because the only callback in there that is not forward-only, .runtime_idle, is not really used by the only user of generic_subsys_pm_ops, which is vio_bus_type. However, the generic callback routines themselves cannot be removed from generic_ops.c, because they are used individually by a number of subsystems. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83a9aa5..21e04dd72a84 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,19 +300,6 @@ const struct dev_pm_ops name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -/* - * Use this for subsystems (bus types, device types, device classes) that don't - * need any special suspend/resume handling in addition to invoking the PM - * callbacks provided by device drivers supporting both the system sleep PM and - * runtime PM, make the pm member point to generic_subsys_pm_ops. - */ -#ifdef CONFIG_PM -extern struct dev_pm_ops generic_subsys_pm_ops; -#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops) -#else -#define GENERIC_SUBSYS_PM_OPS NULL -#endif - /** * PM_EVENT_ messages * -- cgit v1.2.3 From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:29:42 -0800 Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Tigran Aivazian Cc: Len Brown Cc: Zhang Rui Cc: Dave Jones Cc: Peter Zijlstra Cc: Russell King Cc: Andrew Morton Cc: Arjan van de Ven Cc: "Rafael J. Wysocki" Cc: "Srivatsa S. Bhat" Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6cb60fd2ea84..fc3da0d70d68 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -14,7 +14,7 @@ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ -#include +#include #include #include #include @@ -22,19 +22,19 @@ struct cpu { int node_id; /* The node which contains the CPU */ int hotpluggable; /* creates sysfs control file if hotpluggable */ - struct sys_device sysdev; + struct device dev; }; extern int register_cpu(struct cpu *cpu, int num); -extern struct sys_device *get_cpu_sysdev(unsigned cpu); +extern struct device *get_cpu_device(unsigned cpu); -extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); -extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); +extern int cpu_add_dev_attr(struct device_attribute *attr); +extern void cpu_remove_dev_attr(struct device_attribute *attr); -extern int cpu_add_sysdev_attr_group(struct attribute_group *attrs); -extern void cpu_remove_sysdev_attr_group(struct attribute_group *attrs); +extern int cpu_add_dev_attr_group(struct attribute_group *attrs); +extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); -extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); +extern int sched_create_sysfs_power_savings_entries(struct device *dev); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); @@ -160,7 +160,7 @@ static inline void cpu_maps_update_done(void) } #endif /* CONFIG_SMP */ -extern struct sysdev_class cpu_sysdev_class; +extern struct bus_type cpu_subsys; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ -- cgit v1.2.3 From 10fbcf4c6cb122005cdf36fc24d7683da92c7a27 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:48:43 -0800 Subject: convert 'memory' sysdev_class to a regular subsystem This moves the 'memory sysdev_class' over to a regular 'memory' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/memory.h | 3 +-- include/linux/node.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 935699b30b7c..1ac7f6e405f9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -15,7 +15,6 @@ #ifndef _LINUX_MEMORY_H_ #define _LINUX_MEMORY_H_ -#include #include #include #include @@ -38,7 +37,7 @@ struct memory_block { int phys_device; /* to which fru does this belong? */ void *hw; /* optional pointer to fw/hw data */ int (*phys_callback)(struct memory_block *); - struct sys_device sysdev; + struct device dev; }; int arch_get_memory_phys_device(unsigned long start_pfn); diff --git a/include/linux/node.h b/include/linux/node.h index 92370e22343c..624e53cecc02 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -14,12 +14,12 @@ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ -#include +#include #include #include struct node { - struct sys_device sysdev; + struct device dev; #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; @@ -80,6 +80,6 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, } #endif -#define to_node(sys_device) container_of(sys_device, struct node, sysdev) +#define to_node(device) container_of(device, struct node, dev) #endif /* _LINUX_NODE_H_ */ -- cgit v1.2.3 From b3e8d7b2478401b2f25f4566a90faad54f7d6d07 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 15:13:54 -0800 Subject: kobject: remove kset_find_obj_hinted() Now that there are no in-kernel users of this function, remove it as it is no longer needed. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ad81e1c51487..fc615a97e2d3 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -191,8 +191,6 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj) } extern struct kobject *kset_find_obj(struct kset *, const char *); -extern struct kobject *kset_find_obj_hinted(struct kset *, const char *, - struct kobject *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; -- cgit v1.2.3 From e30e2fdfe56288576ee9e04dbb06b4bd5f282203 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 22 Dec 2011 02:45:29 +0530 Subject: VFS: Fix race between CPU hotplug and lglocks Currently, the *_global_[un]lock_online() routines are not at all synchronized with CPU hotplug. Soft-lockups detected as a consequence of this race was reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng for finding out that the root-cause of this issue is the race condition between br_write_[un]lock() and CPU hotplug, which results in the lock states getting messed up). Fixing this race by just adding {get,put}_online_cpus() at appropriate places in *_global_[un]lock_online() is not a good option, because, then suddenly br_write_[un]lock() would become blocking, whereas they have been kept as non-blocking all this time, and we would want to keep them that way. So, overall, we want to ensure 3 things: 1. br_write_lock() and br_write_unlock() must remain as non-blocking. 2. The corresponding lock and unlock of the per-cpu spinlocks must not happen for different sets of CPUs. 3. Either prevent any new CPU online operation in between this lock-unlock, or ensure that the newly onlined CPU does not proceed with its corresponding per-cpu spinlock unlocked. To achieve all this: (a) We introduce a new spinlock that is taken by the *_global_lock_online() routine and released by the *_global_unlock_online() routine. (b) We register a callback for CPU hotplug notifications, and this callback takes the same spinlock as above. (c) We maintain a bitmap which is close to the cpu_online_mask, and once it is initialized in the lock_init() code, all future updates to it are done in the callback, under the above spinlock. (d) The above bitmap is used (instead of cpu_online_mask) while locking and unlocking the per-cpu locks. The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the br_write_lock-unlock sequence is in progress, the callback keeps spinning, thus preventing the CPU online operation till the lock-unlock sequence is complete. This takes care of requirement (3). The bitmap that we maintain remains unmodified throughout the lock-unlock sequence, since all updates to it are managed by the callback, which takes the same spinlock as the one taken by the lock code and released only by the unlock routine. Combining this with (d) above, satisfies requirement (2). Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug operations from racing with br_write_lock-unlock, requirement (1) is also taken care of. By the way, it is to be noted that a CPU offline operation can actually run in parallel with our lock-unlock sequence, because our callback doesn't react to notifications earlier than CPU_DEAD (in order to maintain our bitmap properly). And this means, since we use our own bitmap (which is stale, on purpose) during the lock-unlock sequence, we could end up unlocking the per-cpu lock of an offline CPU (because we had locked it earlier, when the CPU was online), in order to satisfy requirement (2). But this is harmless, though it looks a bit awkward. Debugged-by: Cong Meng Signed-off-by: Srivatsa S. Bhat Signed-off-by: Al Viro Cc: stable@vger.kernel.org --- include/linux/lglock.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index f549056fb20b..87f402ccec55 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -22,6 +22,7 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ #define br_lock_init(name) name##_lock_init() @@ -72,9 +73,31 @@ #define DEFINE_LGLOCK(name) \ \ + DEFINE_SPINLOCK(name##_cpu_lock); \ + cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ + static int \ + name##_lg_cpu_callback(struct notifier_block *nb, \ + unsigned long action, void *hcpu) \ + { \ + switch (action & ~CPU_TASKS_FROZEN) { \ + case CPU_UP_PREPARE: \ + spin_lock(&name##_cpu_lock); \ + cpu_set((unsigned long)hcpu, name##_cpus); \ + spin_unlock(&name##_cpu_lock); \ + break; \ + case CPU_UP_CANCELED: case CPU_DEAD: \ + spin_lock(&name##_cpu_lock); \ + cpu_clear((unsigned long)hcpu, name##_cpus); \ + spin_unlock(&name##_cpu_lock); \ + } \ + return NOTIFY_OK; \ + } \ + static struct notifier_block name##_lg_cpu_notifier = { \ + .notifier_call = name##_lg_cpu_callback, \ + }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -83,6 +106,11 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ + register_hotcpu_notifier(&name##_lg_cpu_notifier); \ + get_online_cpus(); \ + for_each_online_cpu(i) \ + cpu_set(i, name##_cpus); \ + put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -124,9 +152,9 @@ \ void name##_global_lock_online(void) { \ int i; \ - preempt_disable(); \ + spin_lock(&name##_cpu_lock); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_cpu(i, &name##_cpus) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -137,12 +165,12 @@ void name##_global_unlock_online(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_cpu(i, &name##_cpus) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ } \ - preempt_enable(); \ + spin_unlock(&name##_cpu_lock); \ } \ EXPORT_SYMBOL(name##_global_unlock_online); \ \ -- cgit v1.2.3 From 38059ec2bd2ce9e4709f49f34795aa0944287908 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 23 Dec 2011 10:17:51 +1100 Subject: md: Fix userspace free_pages() macro While using etags to find free_pages(), I stumbled across this debug definition of free_pages() that is to be used while debugging some raid code in userspace. The __get_free_pages() allocates the correct size, but the free_pages() does not match. free_pages(), like __get_free_pages(), takes an order and not a size. Acked-by: H. Peter Anvin Signed-off-by: Steven Rostedt Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 2b59cc824395..53272e9860a7 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -132,7 +132,7 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, PROT_READ|PROT_WRITE, \ MAP_PRIVATE|MAP_ANONYMOUS,\ 0, 0)) -# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE) +# define free_pages(x, y) munmap((void *)(x), PAGE_SIZE << (y)) static inline void cpu_relax(void) { -- cgit v1.2.3 From 2d78f8c451785f030ac1676a18691896b59c69d8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:51 +1100 Subject: md: create externally visible flags for supporting hot-replace. hot-replace is a feature being added to md which will allow a device to be replaced without removing it from the array first. With hot-replace a spare can be activated and recovery can start while the original device is still in place, thus allowing a transition from an unreliable device to a reliable device without leaving the array degraded during the transition. It can also be use when the original device is still reliable but it not wanted for some reason. This will eventually be supported in RAID4/5/6 and RAID10. This patch adds a super-block flag to distinguish the replacement device. If an old kernel sees this flag it will reject the device. It also adds two per-device flags which are viewable and settable via sysfs. "want_replacement" can be set to request that a device be replaced. "replacement" is set to show that this device is replacing another device. The "rd%d" links in /sys/block/mdXx/md only apply to the original device, not the replacement. We currently don't make links for the replacement - there doesn't seem to be a need. Signed-off-by: NeilBrown --- include/linux/raid/md_p.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 9e65d9e20662..6f6df86f1ae5 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -277,7 +277,10 @@ struct mdp_superblock_1 { */ #define MD_FEATURE_RESHAPE_ACTIVE 4 #define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ - -#define MD_FEATURE_ALL (1|2|4|8) +#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an + * active device with same 'role'. + * 'recovery_offset' is also set. + */ +#define MD_FEATURE_ALL (1|2|4|8|16) #endif -- cgit v1.2.3 From 3d058d7bc2c5671ae630e0b463be8a69b5783fb9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 18 Dec 2011 01:55:54 +0100 Subject: netfilter: rework user-space expectation helper support This partially reworks bc01befdcf3e40979eb518085a075cbf0aacede0 which added userspace expectation support. This patch removes the nf_ct_userspace_expect_list since now we force to use the new iptables CT target feature to add the helper extension for conntracks that have attached expectations from userspace. A new version of the proof-of-concept code to implement userspace helpers from userspace is available at: http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-POC.tar.bz2 This patch also modifies the CT target to allow to set the conntrack's userspace helper status flags. This flag is used to tell the conntrack system to explicitly allocate the helper extension. This helper extension is useful to link the userspace expectations with the master conntrack that is being tracked from one userspace helper. This feature fixes a problem in the current approach of the userspace helper support. Basically, if the master conntrack that has got a userspace expectation vanishes, the expectations point to one invalid memory address. Thus, triggering an oops in the expectation deletion event path. I decided not to add a new revision of the CT target because I only needed to add a new flag for it. I'll document in this issue in the iptables manpage. I have also changed the return value from EINVAL to EOPNOTSUPP if one flag not supported is specified. Thus, in the future adding new features that only require a new flag can be added without a new revision. There is no official code using this in userspace (apart from the proof-of-concept) that uses this infrastructure but there will be some by beginning 2012. Reported-by: Sam Roberts Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ include/linux/netfilter/xt_CT.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 0d3dd66322ec..9e3a2838291b 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -83,6 +83,10 @@ enum ip_conntrack_status { /* Conntrack is a fake untracked entry */ IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), + + /* Conntrack has a userspace helper. */ + IPS_USERSPACE_HELPER_BIT = 13, + IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT), }; /* Connection tracking event types */ diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index b56e76811c04..6390f0992f36 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h @@ -3,7 +3,8 @@ #include -#define XT_CT_NOTRACK 0x1 +#define XT_CT_NOTRACK 0x1 +#define XT_CT_USERSPACE_HELPER 0x2 struct xt_ct_target_info { __u16 flags; -- cgit v1.2.3 From cbc9f2f4fcd70d5a627558ca9a881fa9391abf69 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 23 Dec 2011 13:59:49 +0100 Subject: netfilter: nf_nat: export NAT definitions to userspace Export the NAT definitions to userspace. So far userspace (specifically, iptables) has been copying the headers files from include/net. Also rename some structures and definitions in preparation for IPv6 NAT. Since these have never been officially exported, this doesn't affect existing userspace code. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + .../linux/netfilter/nf_conntrack_tuple_common.h | 27 ++++++++++ include/linux/netfilter/nf_nat.h | 25 ++++++++++ include/linux/netfilter_ipv4/Kbuild | 1 - include/linux/netfilter_ipv4/nf_nat.h | 58 ---------------------- 5 files changed, 53 insertions(+), 59 deletions(-) create mode 100644 include/linux/netfilter/nf_nat.h delete mode 100644 include/linux/netfilter_ipv4/nf_nat.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index a1b410c76fc3..d81f7719b01c 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h index 2ea22b018a87..2f6bbc5b8125 100644 --- a/include/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/linux/netfilter/nf_conntrack_tuple_common.h @@ -7,6 +7,33 @@ enum ip_conntrack_dir { IP_CT_DIR_MAX }; +/* The protocol-specific manipulable parts of the tuple: always in + * network order + */ +union nf_conntrack_man_proto { + /* Add other protocols here. */ + __be16 all; + + struct { + __be16 port; + } tcp; + struct { + __be16 port; + } udp; + struct { + __be16 id; + } icmp; + struct { + __be16 port; + } dccp; + struct { + __be16 port; + } sctp; + struct { + __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ + } gre; +}; + #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) #endif /* _NF_CONNTRACK_TUPLE_COMMON_H */ diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h new file mode 100644 index 000000000000..8df2d13730b2 --- /dev/null +++ b/include/linux/netfilter/nf_nat.h @@ -0,0 +1,25 @@ +#ifndef _NETFILTER_NF_NAT_H +#define _NETFILTER_NF_NAT_H + +#include +#include + +#define NF_NAT_RANGE_MAP_IPS 1 +#define NF_NAT_RANGE_PROTO_SPECIFIED 2 +#define NF_NAT_RANGE_PROTO_RANDOM 4 +#define NF_NAT_RANGE_PERSISTENT 8 + +struct nf_nat_ipv4_range { + unsigned int flags; + __be32 min_ip; + __be32 max_ip; + union nf_conntrack_man_proto min; + union nf_conntrack_man_proto max; +}; + +struct nf_nat_ipv4_multi_range_compat { + unsigned int rangesize; + struct nf_nat_ipv4_range range[1]; +}; + +#endif /* _NETFILTER_NF_NAT_H */ diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index c3b45480ecf7..f9930c87fff3 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -12,4 +12,3 @@ header-y += ipt_ah.h header-y += ipt_ecn.h header-y += ipt_realm.h header-y += ipt_ttl.h -header-y += nf_nat.h diff --git a/include/linux/netfilter_ipv4/nf_nat.h b/include/linux/netfilter_ipv4/nf_nat.h deleted file mode 100644 index 7a861d09fc86..000000000000 --- a/include/linux/netfilter_ipv4/nf_nat.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _LINUX_NF_NAT_H -#define _LINUX_NF_NAT_H - -#include - -#define IP_NAT_RANGE_MAP_IPS 1 -#define IP_NAT_RANGE_PROTO_SPECIFIED 2 -#define IP_NAT_RANGE_PROTO_RANDOM 4 -#define IP_NAT_RANGE_PERSISTENT 8 - -/* The protocol-specific manipulable parts of the tuple. */ -union nf_conntrack_man_proto { - /* Add other protocols here. */ - __be16 all; - - struct { - __be16 port; - } tcp; - struct { - __be16 port; - } udp; - struct { - __be16 id; - } icmp; - struct { - __be16 port; - } dccp; - struct { - __be16 port; - } sctp; - struct { - __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ - } gre; -}; - -/* Single range specification. */ -struct nf_nat_range { - /* Set to OR of flags above. */ - unsigned int flags; - - /* Inclusive: network order. */ - __be32 min_ip, max_ip; - - /* Inclusive: network order */ - union nf_conntrack_man_proto min, max; -}; - -/* For backwards compat: don't use in modern code. */ -struct nf_nat_multi_range_compat { - unsigned int rangesize; /* Must be 1. */ - - /* hangs off end. */ - struct nf_nat_range range[1]; -}; - -#define nf_nat_multi_range nf_nat_multi_range_compat - -#endif -- cgit v1.2.3 From 9d4dde5215779f4099730194ad30624fdba3d8b2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 22 Dec 2011 23:39:14 +0000 Subject: net: only use a single page of slop in MAX_SKB_FRAGS In order to accommodate a 64K buffer we need 64K/PAGE_SIZE plus one more page in order to allow for a buffer which does not start on a page boundary. Signed-off-by: Ian Campbell Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 12e6fed73f8e..f47f0c3939f2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -128,13 +128,17 @@ struct sk_buff_head { struct sk_buff; -/* To allow 64K frame to be packed as single skb without frag_list. Since - * GRO uses frags we allocate at least 16 regardless of page size. +/* To allow 64K frame to be packed as single skb without frag_list we + * require 64K/PAGE_SIZE pages plus 1 additional page to allow for + * buffers which do not start on a page boundary. + * + * Since GRO uses frags we allocate at least 16 regardless of page + * size. */ -#if (65536/PAGE_SIZE + 2) < 16 +#if (65536/PAGE_SIZE + 1) < 16 #define MAX_SKB_FRAGS 16UL #else -#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) +#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif typedef struct skb_frag_struct skb_frag_t; -- cgit v1.2.3 From c87fb57346fc7653ace98769f148e0dcd88ac1ee Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 14 Dec 2011 23:43:16 +0100 Subject: ARM: 7235/1: irqdomain: export irq_domain_simple_ops for !CONFIG_OF irqdomain support is used in interrupt controller drivers that may not have device tree support but only need the basic HW->Linux irq translation. Rather than having each of these implement their own IRQ domain, allow them to use the simple ops. Acked-by: Thomas Gleixner Acked-by: Rob Herring Cc: Grant Likely Signed-off-by: Jamie Iles Signed-off-by: Russell King --- include/linux/irqdomain.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 99834e581b9e..bd4272b61a14 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -91,10 +91,11 @@ static inline unsigned int irq_domain_to_irq(struct irq_domain *d, extern void irq_domain_add(struct irq_domain *domain); extern void irq_domain_del(struct irq_domain *domain); + +extern struct irq_domain_ops irq_domain_simple_ops; #endif /* CONFIG_IRQ_DOMAIN */ #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ) -extern struct irq_domain_ops irq_domain_simple_ops; extern void irq_domain_add_simple(struct device_node *controller, int irq_base); extern void irq_domain_generate_simple(const struct of_device_id *match, u64 phys_base, unsigned int irq_start); -- cgit v1.2.3 From 60b778ce519625102d3f72a2071ea72a05e990ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Dec 2011 06:56:49 +0000 Subject: rfs: better sizing of dev_flow_table Aim of this patch is to provide full range of rps_flow_cnt on 64bit arches. Theorical limit on number of flows is 2^32 Fix some buggy RPS/RFS macros as well. Signed-off-by: Eric Dumazet CC: Tom Herbert CC: Xi Wang CC: Laurent Chavey Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 603730804da5..a776a675c0e5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -597,7 +597,7 @@ struct rps_map { struct rcu_head rcu; u16 cpus[0]; }; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) +#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) /* * The rps_dev_flow structure contains the mapping of a flow to a CPU, the @@ -621,7 +621,7 @@ struct rps_dev_flow_table { struct rps_dev_flow flows[0]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - (_num * sizeof(struct rps_dev_flow))) + ((_num) * sizeof(struct rps_dev_flow))) /* * The rps_sock_flow_table contains mappings of flows to the last CPU @@ -632,7 +632,7 @@ struct rps_sock_flow_table { u16 ents[0]; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ - (_num * sizeof(u16))) + ((_num) * sizeof(u16))) #define RPS_NO_CPU 0xffff @@ -684,7 +684,7 @@ struct xps_map { struct rcu_head rcu; u16 queues[0]; }; -#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) +#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ / sizeof(u16)) -- cgit v1.2.3 From 9413902796f56f6209e19dd54e840ed46950612c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Dec 2011 14:19:50 +0100 Subject: netfilter: add extended accounting infrastructure over nfnetlink We currently have two ways to account traffic in netfilter: - iptables chain and rule counters: # iptables -L -n -v Chain INPUT (policy DROP 3 packets, 867 bytes) pkts bytes target prot opt in out source destination 8 1104 ACCEPT all -- lo * 0.0.0.0/0 0.0.0.0/0 - use flow-based accounting provided by ctnetlink: # conntrack -L tcp 6 431999 ESTABLISHED src=192.168.1.130 dst=212.106.219.168 sport=58152 dport=80 packets=47 bytes=7654 src=212.106.219.168 dst=192.168.1.130 sport=80 dport=58152 packets=49 bytes=66340 [ASSURED] mark=0 use=1 While trying to display real-time accounting statistics, we require to pool the kernel periodically to obtain this information. This is OK if the number of flows is relatively low. However, in case that the number of flows is huge, we can spend a considerable amount of cycles to iterate over the list of flows that have been obtained. Moreover, if we want to obtain the sum of the flow accounting results that match some criteria, we have to iterate over the whole list of existing flows, look for matchings and update the counters. This patch adds the extended accounting infrastructure for nfnetlink which aims to allow displaying real-time traffic accounting without the need of complicated and resource-consuming implementation in user-space. Basically, this new infrastructure allows you to create accounting objects. One accounting object is composed of packet and byte counters. In order to manipulate create accounting objects, you require the new libnetfilter_acct library. It contains several examples of use: libnetfilter_acct/examples# ./nfacct-add http-traffic libnetfilter_acct/examples# ./nfacct-get http-traffic = { pkts = 000000000000, bytes = 000000000000 }; Then, you can use one of this accounting objects in several iptables rules using the new nfacct match (which comes in a follow-up patch): # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic The idea is simple: if one packet matches the rule, the nfacct match updates the counters. Thanks to Patrick McHardy, Eric Dumazet, Changli Gao for reviewing and providing feedback for this contribution. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 ++- include/linux/netfilter/nfnetlink_acct.h | 36 ++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/nfnetlink_acct.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index d81f7719b01c..6785246e6e62 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -7,6 +7,7 @@ header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h header-y += nf_nat.h header-y += nfnetlink.h +header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h header-y += nfnetlink_log.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 74d33861473c..b64454c2f79f 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -48,7 +48,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_ULOG 4 #define NFNL_SUBSYS_OSF 5 #define NFNL_SUBSYS_IPSET 6 -#define NFNL_SUBSYS_COUNT 7 +#define NFNL_SUBSYS_ACCT 7 +#define NFNL_SUBSYS_COUNT 8 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h new file mode 100644 index 000000000000..7c4279b4ae7a --- /dev/null +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -0,0 +1,36 @@ +#ifndef _NFNL_ACCT_H_ +#define _NFNL_ACCT_H_ + +#ifndef NFACCT_NAME_MAX +#define NFACCT_NAME_MAX 32 +#endif + +enum nfnl_acct_msg_types { + NFNL_MSG_ACCT_NEW, + NFNL_MSG_ACCT_GET, + NFNL_MSG_ACCT_GET_CTRZERO, + NFNL_MSG_ACCT_DEL, + NFNL_MSG_ACCT_MAX +}; + +enum nfnl_acct_type { + NFACCT_UNSPEC, + NFACCT_NAME, + NFACCT_PKTS, + NFACCT_BYTES, + NFACCT_USE, + __NFACCT_MAX +}; +#define NFACCT_MAX (__NFACCT_MAX - 1) + +#ifdef __KERNEL__ + +struct nf_acct; + +extern struct nf_acct *nfnl_acct_find_get(const char *filter_name); +extern void nfnl_acct_put(struct nf_acct *acct); +extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); + +#endif /* __KERNEL__ */ + +#endif /* _NFNL_ACCT_H */ -- cgit v1.2.3 From ceb98d03eac5704820f2ac1f370c9ff385e3a9f5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Dec 2011 14:28:59 +0100 Subject: netfilter: xtables: add nfacct match to support extended accounting This patch adds the match that allows to perform extended accounting. It requires the new nfnetlink_acct infrastructure. # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_nfacct.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 include/linux/netfilter/xt_nfacct.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 6785246e6e62..e630a2ed4f18 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -23,6 +23,7 @@ header-y += xt_DSCP.h header-y += xt_IDLETIMER.h header-y += xt_LED.h header-y += xt_MARK.h +header-y += xt_nfacct.h header-y += xt_NFLOG.h header-y += xt_NFQUEUE.h header-y += xt_RATEEST.h diff --git a/include/linux/netfilter/xt_nfacct.h b/include/linux/netfilter/xt_nfacct.h new file mode 100644 index 000000000000..3e19c8a86576 --- /dev/null +++ b/include/linux/netfilter/xt_nfacct.h @@ -0,0 +1,13 @@ +#ifndef _XT_NFACCT_MATCH_H +#define _XT_NFACCT_MATCH_H + +#include + +struct nf_acct; + +struct xt_nfacct_match_info { + char name[NFACCT_NAME_MAX]; + struct nf_acct *nfacct; +}; + +#endif /* _XT_NFACCT_MATCH_H */ -- cgit v1.2.3 From 0f966d74cf77a9140a025464a287e1d2fee8a1fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:30 +0100 Subject: PM / shmobile: Don't include SH7372's INTCS in syscore suspend/resume Since the SH7372's INTCS in included into syscore suspend/resume, which causes the chip to be accessed when PM domains have been turned off during system suspend, the A4R domain containing the INTCS has to stay on during system sleep, which is suboptimal from the power consumption point of view. For this reason, add a new INTC flag, skip_syscore_suspend, to mark the INTCS for intc_suspend() and intc_resume(), so that they don't touch it. This allows the A4R domain to be turned off during system suspend and the INTCS state is resrored during system resume by the A4R's "power on" code. Suggested-by: Magnus Damm Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- include/linux/sh_intc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 5812fefbcedf..b160645f5599 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -95,6 +95,7 @@ struct intc_desc { unsigned int num_resources; intc_enum force_enable; intc_enum force_disable; + bool skip_syscore_suspend; struct intc_hw_desc hw; }; -- cgit v1.2.3 From 40a5f8be2f482783de0f1f0fe856660e489734a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:52 +0100 Subject: PM / QoS: Introduce dev_pm_qos_add_ancestor_request() Some devices, like the I2C controller on SH7372, are not necessary for providing power to their children or forwarding wakeup signals (and generally interrupts) from them. They are only needed by their children when there's some data to transfer, so they may be suspended for the majority of time and resumed on demand, when the children have data to send or receive. For this purpose, however, their power.ignore_children flags have to be set, or the PM core wouldn't allow them to be suspended while their children were active. Unfortunately, in some situations it may take too much time to resume such devices so that they can assist their children in transferring data. For example, if such a device belongs to a PM domain which goes to the "power off" state when that device is suspended, it may take too much time to restore power to the domain in response to the request from one of the device's children. In that case, if the parent's resume time is critical, the domain should stay in the "power on" state, although it still may be desirable to power manage the parent itself (e.g. by manipulating its clock). In general, device PM QoS may be used to address this problem. Namely, if the device's children added PM QoS latency constraints for it, they would be able to prevent it from being put into an overly deep low-power state. However, in some cases the devices needing to be serviced are not the immediate children of a "children-ignoring" device, but its grandchildren or even less direct descendants. In those cases, the entity wanting to add a PM QoS request for a given device's ancestor that ignores its children will have to find it in the first place, so introduce a new helper function that may be used to achieve that. This function, dev_pm_qos_add_ancestor_request(), will search for the first ancestor of the given device whose power.ignore_children flag is set and will add a device PM QoS latency request for that ancestor on behalf of the caller. The request added this way may be removed with the help of dev_pm_qos_remove_request() in the future, like any other device PM QoS latency request. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea302a80..fe247b33652d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -91,6 +91,8 @@ int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); +int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value); #else static inline int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, @@ -150,6 +152,9 @@ static inline void dev_pm_qos_constraints_destroy(struct device *dev) { dev->power.power_state = PMSG_INVALID; } +static inline int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value) + { return 0; } #endif #endif -- cgit v1.2.3 From 4d25a066b69fb749a39d0d4c610689dd765a0b0e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 21 Dec 2011 12:28:29 +0100 Subject: KVM: Don't automatically expose the TSC deadline timer in cpuid Unlike all of the other cpuid bits, the TSC deadline timer bit is set unconditionally, regardless of what userspace wants. This is broken in several ways: - if userspace doesn't use KVM_CREATE_IRQCHIP, and doesn't emulate the TSC deadline timer feature, a guest that uses the feature will break - live migration to older host kernels that don't support the TSC deadline timer will cause the feature to be pulled from under the guest's feet; breaking it - guests that are broken wrt the feature will fail. Fix by not enabling the feature automatically; instead report it to userspace. Because the feature depends on KVM_CREATE_IRQCHIP, which we cannot guarantee will be called, we expose it via a KVM_CAP_TSC_DEADLINE_TIMER and not KVM_GET_SUPPORTED_CPUID. Fixes the Illumos guest kernel, which uses the TSC deadline timer feature. [avi: add the KVM_CAP + documentation] Reported-by: Alexey Zaytsev Tested-by: Alexey Zaytsev Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c3892fc1d538..68e67e50d028 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_S390_GMAP 71 +#define KVM_CAP_TSC_DEADLINE_TIMER 72 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From b3b73ec0d7fe5bf8f950232aa58dfa0416a62372 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Dec 2011 00:29:55 +0100 Subject: PM / Freezer: fix return value of freezable_schedule_timeout_killable() ...it should return the return code from schedule_timeout_killable(), not the one from freezer_count(). All of the current callers ignore the return code so the bug is harmless but it's worth fixing. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7bcfe73d999b..0ab54e16a91f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -116,9 +116,11 @@ static inline int freezer_should_skip(struct task_struct *p) /* Like schedule_timeout_killable(), but should not block the freezer. */ #define freezable_schedule_timeout_killable(timeout) \ ({ \ + long __retval; \ freezer_do_not_count(); \ - schedule_timeout_killable(timeout); \ + __retval = schedule_timeout_killable(timeout); \ freezer_count(); \ + __retval; \ }) /* -- cgit v1.2.3 From 7b482c8360d368fd495685a2c69ca4f1e7b29764 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 20 Dec 2011 22:56:45 +0100 Subject: ARM/of: allow *machine_desc.dt_compat to be const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows dt_compat to point to a constant list of compatible strings. Signed-off-by: Uwe Kleine-König Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index c84d900fbbb3..ed136ad698ce 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -71,7 +71,7 @@ extern int of_fdt_is_compatible(struct boot_param_header *blob, unsigned long node, const char *compat); extern int of_fdt_match(struct boot_param_header *blob, unsigned long node, - const char **compat); + const char *const *compat); extern void of_fdt_unflatten_tree(unsigned long *blob, struct device_node **mynodes); @@ -88,7 +88,7 @@ extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); -extern int of_flat_dt_match(unsigned long node, const char **matches); +extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, -- cgit v1.2.3 From 8af0da93da7c40526959ab5291964581c678d3e7 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Thu, 22 Dec 2011 20:19:24 +0800 Subject: dt: reform for_each_property to for_each_property_of_node Make this macro easier to use(do not need to pass properties, a node is enough), also change to a more sensible name as for_each_child_of_node. Signed-off-by: Dong Aisheng Cc: Grant Likely Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 4948552d60f5..f1a490c37e06 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -219,8 +219,8 @@ extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); -#define for_each_property(pp, properties) \ - for (pp = properties; pp != NULL; pp = pp->next) +#define for_each_property_of_node(dn, pp) \ + for (pp = dn->properties; pp != NULL; pp = pp->next) extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); -- cgit v1.2.3 From d446a8202c81d95f91b1682fc67e7fadd9a31389 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 9 Jun 2011 21:03:07 +0200 Subject: netfilter: xtables: move ipt_ecn to xt_ecn Prepare the ECN match for augmentation by an IPv6 counterpart. Since no symbol dependencies to ipv6.ko are added, having a single ecn match module is the more so welcome. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_ecn.h | 35 ++++++++++++++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_ecn.h | 31 +----------------------------- 3 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 include/linux/netfilter/xt_ecn.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index e630a2ed4f18..e144f54185c0 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -43,6 +43,7 @@ header-y += xt_cpu.h header-y += xt_dccp.h header-y += xt_devgroup.h header-y += xt_dscp.h +header-y += xt_ecn.h header-y += xt_esp.h header-y += xt_hashlimit.h header-y += xt_helper.h diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h new file mode 100644 index 000000000000..065c1a537e5d --- /dev/null +++ b/include/linux/netfilter/xt_ecn.h @@ -0,0 +1,35 @@ +/* iptables module for matching the ECN header in IPv4 and TCP header + * + * (C) 2002 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * + * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp +*/ +#ifndef _XT_ECN_H +#define _XT_ECN_H + +#include +#include + +#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) + +#define IPT_ECN_OP_MATCH_IP 0x01 +#define IPT_ECN_OP_MATCH_ECE 0x10 +#define IPT_ECN_OP_MATCH_CWR 0x20 + +#define IPT_ECN_OP_MATCH_MASK 0xce + +/* match info */ +struct ipt_ecn_info { + __u8 operation; + __u8 invert; + __u8 ip_ect; + union { + struct { + __u8 ect; + } tcp; + } proto; +}; + +#endif /* _XT_ECN_H */ diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index eabf95fb7d3e..b1124ec76190 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h @@ -1,35 +1,6 @@ -/* iptables module for matching the ECN header in IPv4 and TCP header - * - * (C) 2002 Harald Welte - * - * This software is distributed under GNU GPL v2, 1991 - * - * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp -*/ #ifndef _IPT_ECN_H #define _IPT_ECN_H -#include -#include - -#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) - -#define IPT_ECN_OP_MATCH_IP 0x01 -#define IPT_ECN_OP_MATCH_ECE 0x10 -#define IPT_ECN_OP_MATCH_CWR 0x20 - -#define IPT_ECN_OP_MATCH_MASK 0xce - -/* match info */ -struct ipt_ecn_info { - __u8 operation; - __u8 invert; - __u8 ip_ect; - union { - struct { - __u8 ect; - } tcp; - } proto; -}; +#include #endif /* _IPT_ECN_H */ -- cgit v1.2.3 From a4c6f9d3636db538025f9622c008192a0835cc23 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 9 Jun 2011 21:15:37 +0200 Subject: netfilter: xtables: give xt_ecn its own name Use the new macro and struct names in xt_ecn.h, and put the old definitions into a definition-forwarding ipt_ecn.h. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_ecn.h | 12 ++++++------ include/linux/netfilter_ipv4/ipt_ecn.h | 11 ++++++++++- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h index 065c1a537e5d..7158fca364f2 100644 --- a/include/linux/netfilter/xt_ecn.h +++ b/include/linux/netfilter/xt_ecn.h @@ -12,16 +12,16 @@ #include #include -#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) +#define XT_ECN_IP_MASK (~XT_DSCP_MASK) -#define IPT_ECN_OP_MATCH_IP 0x01 -#define IPT_ECN_OP_MATCH_ECE 0x10 -#define IPT_ECN_OP_MATCH_CWR 0x20 +#define XT_ECN_OP_MATCH_IP 0x01 +#define XT_ECN_OP_MATCH_ECE 0x10 +#define XT_ECN_OP_MATCH_CWR 0x20 -#define IPT_ECN_OP_MATCH_MASK 0xce +#define XT_ECN_OP_MATCH_MASK 0xce /* match info */ -struct ipt_ecn_info { +struct xt_ecn_info { __u8 operation; __u8 invert; __u8 ip_ect; diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index b1124ec76190..0e0c063dbf60 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h @@ -2,5 +2,14 @@ #define _IPT_ECN_H #include +#define ipt_ecn_info xt_ecn_info -#endif /* _IPT_ECN_H */ +enum { + IPT_ECN_IP_MASK = XT_ECN_IP_MASK, + IPT_ECN_OP_MATCH_IP = XT_ECN_OP_MATCH_IP, + IPT_ECN_OP_MATCH_ECE = XT_ECN_OP_MATCH_ECE, + IPT_ECN_OP_MATCH_CWR = XT_ECN_OP_MATCH_CWR, + IPT_ECN_OP_MATCH_MASK = XT_ECN_OP_MATCH_MASK, +}; + +#endif /* IPT_ECN_H */ -- cgit v1.2.3 From 3ecdd0515287afbcde352077d59e4028dcfbb685 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Dec 2011 09:13:54 -0600 Subject: dt: add empty of_get_node/of_put_node functions Add empty of_get_node/of_put_node functions for !CONFIG_OF builds. Signed-off-by: Rob Herring Acked-by: Grant Likely --- include/linux/of.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index f1a490c37e06..9abd3ec3c2ac 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -65,6 +65,20 @@ struct device_node { #endif }; +#if defined(CONFIG_SPARC) || !defined(CONFIG_OF) +/* Dummy ref counting routines - to be implemented later */ +static inline struct device_node *of_node_get(struct device_node *node) +{ + return node; +} +static inline void of_node_put(struct device_node *node) +{ +} +#else +extern struct device_node *of_node_get(struct device_node *node); +extern void of_node_put(struct device_node *node); +#endif + #ifdef CONFIG_OF /* Pointer for first entry in chain of all nodes. */ @@ -95,21 +109,6 @@ static inline void of_node_set_flag(struct device_node *n, unsigned long flag) extern struct device_node *of_find_all_nodes(struct device_node *prev); -#if defined(CONFIG_SPARC) -/* Dummy ref counting routines - to be implemented later */ -static inline struct device_node *of_node_get(struct device_node *node) -{ - return node; -} -static inline void of_node_put(struct device_node *node) -{ -} - -#else -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); -#endif - /* * OF address retrieval & translation */ -- cgit v1.2.3 From e6fe2371bdd3713d0b227e9cd7f905e127ff81a0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:52:21 +0000 Subject: sock_diag: Arrange sock_diag.h such that it is exportable to userspace Properly toss existing components around the ifdef __KERNEL__ and include the header into the header-y target. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/sock_diag.h | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 0b091b32267d..8e484d660bc3 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -195,6 +195,7 @@ header-y += igmp.h header-y += in.h header-y += in6.h header-y += in_route.h +header-y += sock_diag.h header-y += inet_diag.h header-y += inotify.h header-y += input.h diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 379d5dccf8e1..66bc18ef4fa4 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,16 +1,19 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ -#define SOCK_DIAG_BY_FAMILY 20 +#include -struct sk_buff; -struct nlmsghdr; +#define SOCK_DIAG_BY_FAMILY 20 struct sock_diag_req { __u8 sdiag_family; __u8 sdiag_protocol; }; +#ifdef __KERNEL__ +struct sk_buff; +struct nlmsghdr; + struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); @@ -26,4 +29,5 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); extern struct sock *sock_diag_nlsk; +#endif /* KERNEL */ #endif -- cgit v1.2.3 From 288461e1546fa4162fa237eeed8ea09a16521dcd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:52:51 +0000 Subject: unix_diag: Include unix_diag.h into header-y target The headers check complains it should include the linux/types.h withing, thus add this one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/unix_diag.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 8e484d660bc3..c94e71781b79 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -197,6 +197,7 @@ header-y += in6.h header-y += in_route.h header-y += sock_diag.h header-y += inet_diag.h +header-y += unix_diag.h header-y += inotify.h header-y += input.h header-y += ioctl.h diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 3f7afb007d70..a5ce0f325745 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -1,6 +1,8 @@ #ifndef __UNIX_DIAG_H__ #define __UNIX_DIAG_H__ +#include + struct unix_diag_req { __u8 sdiag_family; __u8 sdiag_protocol; -- cgit v1.2.3 From 5d2e5f274f9e9a06fb934dd45260e2616a9992e6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:53:13 +0000 Subject: sock_diag: Introduce the meminfo nla core (v2) Add a routine that dumps memory-related values of a socket. It's made as an array to make it possible to add more stuff here later without breaking compatibility. Since v1: The SK_MEMINFO_ constants are in userspace visible part of sock_diag.h, the rest is under __KERNEL__. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 66bc18ef4fa4..251729a47880 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -10,9 +10,22 @@ struct sock_diag_req { __u8 sdiag_protocol; }; +enum { + SK_MEMINFO_RMEM_ALLOC, + SK_MEMINFO_RCVBUF, + SK_MEMINFO_WMEM_ALLOC, + SK_MEMINFO_SNDBUF, + SK_MEMINFO_FWD_ALLOC, + SK_MEMINFO_WMEM_QUEUED, + SK_MEMINFO_OPTMEM, + + SK_MEMINFO_VARS, +}; + #ifdef __KERNEL__ struct sk_buff; struct nlmsghdr; +struct sock; struct sock_diag_handler { __u8 family; @@ -28,6 +41,8 @@ void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlms int sock_diag_check_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); +int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); + extern struct sock *sock_diag_nlsk; #endif /* KERNEL */ #endif -- cgit v1.2.3 From c0636faa539ec4205ec50e80844a5b0454b4bbaa Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:53:32 +0000 Subject: inet_diag: Add the SKMEMINFO extension Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index afa5d5c74169..34e8d52c1925 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -108,9 +108,10 @@ enum { INET_DIAG_CONG, INET_DIAG_TOS, INET_DIAG_TCLASS, + INET_DIAG_SKMEMINFO, }; -#define INET_DIAG_MAX INET_DIAG_TCLASS +#define INET_DIAG_MAX INET_DIAG_SKMEMINFO /* INET_DIAG_MEM */ -- cgit v1.2.3 From 257b529876cb45ec791eaa89e3d2ee0d16b49383 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 09:27:43 +0000 Subject: unix_diag: Add the MEMINFO extension [ Fix indentation of sock_diag*() calls. -DaveM ] Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index a5ce0f325745..93fdb782468a 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -18,6 +18,7 @@ struct unix_diag_req { #define UDIAG_SHOW_PEER 0x00000004 /* show peer socket info */ #define UDIAG_SHOW_ICONS 0x00000008 /* show pending connections */ #define UDIAG_SHOW_RQLEN 0x00000010 /* show skb receive queue len */ +#define UDIAG_SHOW_MEMINFO 0x00000020 /* show memory info of a socket */ struct unix_diag_msg { __u8 udiag_family; @@ -35,6 +36,7 @@ enum { UNIX_DIAG_PEER, UNIX_DIAG_ICONS, UNIX_DIAG_RQLEN, + UNIX_DIAG_MEMINFO, UNIX_DIAG_MAX, }; -- cgit v1.2.3 From c9da99e6475f92653139e43f3c30c0cd011a0fd8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:54:39 +0000 Subject: unix_diag: Fixup RQLEN extension report While it's not too late fix the recently added RQLEN diag extension to report rqlen and wqlen in the same way as TCP does. I.e. for listening sockets the ack backlog length (which is the input queue length for socket) in rqlen and the max ack backlog length in wqlen, and what the CINQ/OUTQ ioctls do for established. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/unix_diag.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index 93fdb782468a..b1d2bf16b33c 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -46,4 +46,9 @@ struct unix_diag_vfs { __u32 udiag_vfs_dev; }; +struct unix_diag_rqlen { + __u32 udiag_rqueue; + __u32 udiag_wqueue; +}; + #endif -- cgit v1.2.3 From 30e053248da178cf6154bb7e950dc8713567e3fa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jan 2012 13:14:29 +0100 Subject: security: Fix security_old_inode_init_security() when CONFIG_SECURITY is not set Commit 1e39f384bb01 ("evm: fix build problems") makes the stub version of security_old_inode_init_security() return 0 when CONFIG_SECURITY is not set. But that makes callers such as reiserfs_security_init() assume that security_old_inode_init_security() has set name, value, and len arguments properly - but security_old_inode_init_security() left them uninitialized which then results in interesting failures. Revert security_old_inode_init_security() to the old behavior of returning EOPNOTSUPP since both callers (reiserfs and ocfs2) handle this just fine. [ Also fixed the S_PRIVATE(inode) case of the actual non-stub security_old_inode_init_security() function to return EOPNOTSUPP for the same reason, as pointed out by Mimi Zohar. It got incorrectly changed to match the new function in commit fb88c2b6cbb1: "evm: fix security/security_old_init_security return code". - Linus ] Reported-by: Jorge Bastos Acked-by: James Morris Acked-by: Mimi Zohar Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 19d8e04e1688..e8c619d39291 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2056,7 +2056,7 @@ static inline int security_old_inode_init_security(struct inode *inode, char **name, void **value, size_t *len) { - return 0; + return -EOPNOTSUPP; } static inline int security_inode_create(struct inode *dir, -- cgit v1.2.3 From 43c6759e73907e4c8e6624f70f5c4a860518b203 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 3 Jan 2012 20:23:18 -0500 Subject: net: phy: smsc: Move SMSC PHY constants to SMSC generation 4 LAN chips integrate an IEEE 802.3 ethernet physical layer. The ethernet driver for this family of devices needs to access the SMSC PHY registers and bit-fields. So, this patch moves these constants to a place where it can be used for both the PHY and LAN drivers. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- include/linux/smscphy.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/smscphy.h (limited to 'include/linux') diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h new file mode 100644 index 000000000000..ce718cbce435 --- /dev/null +++ b/include/linux/smscphy.h @@ -0,0 +1,25 @@ +#ifndef __LINUX_SMSCPHY_H__ +#define __LINUX_SMSCPHY_H__ + +#define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */ +#define MII_LAN83C185_IM 30 /* Interrupt Mask */ +#define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */ + +#define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */ +#define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */ +#define MII_LAN83C185_ISF_INT3 (1<<3) /* Auto-Negotiation LP Ack */ +#define MII_LAN83C185_ISF_INT4 (1<<4) /* Link Down */ +#define MII_LAN83C185_ISF_INT5 (1<<5) /* Remote Fault Detected */ +#define MII_LAN83C185_ISF_INT6 (1<<6) /* Auto-Negotiation complete */ +#define MII_LAN83C185_ISF_INT7 (1<<7) /* ENERGYON */ + +#define MII_LAN83C185_ISF_INT_ALL (0x0e) + +#define MII_LAN83C185_ISF_INT_PHYLIB_EVENTS \ + (MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4 | \ + MII_LAN83C185_ISF_INT7) + +#define MII_LAN83C185_EDPWRDOWN (1 << 13) /* EDPWRDOWN */ +#define MII_LAN83C185_ENERGYON (1 << 1) /* ENERGYON */ + +#endif /* __LINUX_SMSCPHY_H__ */ -- cgit v1.2.3 From 5ede7b1cfa8201418fb35e12f770e9e7c2559a4d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Oct 2011 18:49:54 -0400 Subject: pull manipulations of rpc_cred inside alloc_nfs_open_context() No need to duplicate them in both callers; make it return ERR_PTR(-ENOMEM) on allocation failure instead of NULL and it'll be able to report rpc_lookup_cred() failures just fine. Callers are much happier that way... Signed-off-by: Al Viro --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 92ecf5585fac..8c29950d2fa5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -373,7 +373,7 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode); -extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode); +extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode); extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx); extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); -- cgit v1.2.3 From 6c449c8dfe30142b3ced5f052e8ed3cce308801a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Nov 2011 22:01:36 -0500 Subject: unexport put_mnt_ns(), make create_mnt_ns() static outright Signed-off-by: Al Viro --- include/linux/mnt_namespace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 29304855652d..e87ec01aac9d 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,7 +22,6 @@ struct proc_mounts { struct fs_struct; -extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt); extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); -- cgit v1.2.3 From a5166169f9b920cae3c503910cb66a3ac5dd846d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Dec 2011 22:53:00 -0500 Subject: vfs: convert fs_supers to hlist Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e0bc4ffb8e7f..ed17e54fd204 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1440,7 +1440,7 @@ struct super_block { struct block_device *s_bdev; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; - struct list_head s_instances; + struct hlist_node s_instances; struct quota_info s_dquot; /* Diskquota specific options */ int s_frozen; @@ -1864,7 +1864,7 @@ struct file_system_type { void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; - struct list_head fs_supers; + struct hlist_head fs_supers; struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; -- cgit v1.2.3 From 79e801a906db46cb8efad66c400b01df874b3f12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Dec 2011 23:07:05 -0500 Subject: vfs: make do_kern_mount() static the only user outside of fs/namespace.c has died Signed-off-by: Al Viro --- include/linux/mount.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 33fe53d78110..65c1bb013836 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -100,9 +100,6 @@ extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -extern struct vfsmount *do_kern_mount(const char *fstype, int flags, - const char *name, void *data); - struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, -- cgit v1.2.3 From 8c9379e972e984d11c2b99121847ba9fa7a0c56c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 20:18:57 -0500 Subject: constify seq_file stuff Signed-off-by: Al Viro --- include/linux/seq_file.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0b69a4684216..44f1514b00ba 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -74,7 +74,7 @@ static inline void seq_commit(struct seq_file *m, int num) } } -char *mangle_path(char *s, char *p, char *esc); +char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); @@ -86,10 +86,10 @@ int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); -int seq_path(struct seq_file *, struct path *, char *); -int seq_dentry(struct seq_file *, struct dentry *, char *); -int seq_path_root(struct seq_file *m, struct path *path, struct path *root, - char *esc); +int seq_path(struct seq_file *, const struct path *, const char *); +int seq_dentry(struct seq_file *, struct dentry *, const char *); +int seq_path_root(struct seq_file *m, const struct path *path, + const struct path *root, const char *esc); int seq_bitmap(struct seq_file *m, const unsigned long *bits, unsigned int nr_bits); static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) -- cgit v1.2.3 From 2a79f17e4a641a2f463cb512cb0ec349844a147b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Dec 2011 08:06:57 -0500 Subject: vfs: mnt_drop_write_file() new helper (wrapper around mnt_drop_write()) to be used in pair with mnt_want_write_file(). Signed-off-by: Al Viro --- include/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 65c1bb013836..00f5c4f2160b 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -94,6 +94,7 @@ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); +extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); -- cgit v1.2.3 From cf31e70d6cf93f19fe9bf1144966ef40991ac723 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Jan 2012 22:28:36 -0500 Subject: vfs: new helper - vfs_ustat() ... and bury user_get_super()/statfs_by_dentry() - they are purely internal now. Signed-off-by: Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed17e54fd204..cec429d76ab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1939,7 +1939,7 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, extern int vfs_statfs(struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -extern int statfs_by_dentry(struct dentry *, struct kstatfs *); +extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); @@ -2531,7 +2531,6 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); extern struct super_block *get_active_super(struct block_device *bdev); -extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern void iterate_supers_type(struct file_system_type *, -- cgit v1.2.3 From ff01bb4832651c6d25ac509a06a10fcbd75c461c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Sep 2011 02:31:11 -0400 Subject: fs: move code out of buffer.c Move invalidate_bdev, block_sync_page into fs/block_dev.c. Export kill_bdev as well, so brd doesn't have to open code it. Reduce buffer_head.h requirement accordingly. Removed a rather large comment from invalidate_bdev, as it looked a bit obsolete to bother moving. The small comment replacing it says enough. Signed-off-by: Nick Piggin Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cec429d76ab0..e853ba5eddd4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2092,6 +2092,7 @@ extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); +extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); @@ -2099,6 +2100,7 @@ extern int fsync_bdev(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline void kill_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {} static inline struct super_block *freeze_bdev(struct block_device *sb) @@ -2415,6 +2417,7 @@ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); +extern void block_sync_page(struct page *page); /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, -- cgit v1.2.3 From 8208a22bb8bd3c52ef634b4ff194f14892ab1713 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jul 2011 17:32:17 -0400 Subject: switch sys_mknodat(2) to umode_t Signed-off-by: Al Viro --- include/linux/syscalls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 86a24b1166d1..b3c16d8a6383 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -475,7 +475,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old); asmlinkage long sys_chroot(const char __user *filename); -asmlinkage long sys_mknod(const char __user *filename, int mode, +asmlinkage long sys_mknod(const char __user *filename, umode_t mode, unsigned dev); asmlinkage long sys_link(const char __user *oldname, const char __user *newname); @@ -755,7 +755,7 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, asmlinkage long sys_spu_create(const char __user *name, unsigned int flags, mode_t mode, int fd); -asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, +asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, unsigned dev); asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode); asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); -- cgit v1.2.3 From 18bb1db3e7607e4a997d50991a6f9fa5b0f8722c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 01:41:39 -0400 Subject: switch vfs_mkdir() and ->mkdir() to umode_t vfs_mkdir() gets int, but immediately drops everything that might not fit into umode_t and that's the only caller of ->mkdir()... Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- include/linux/security.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cec429d76ab0..3f7bd8b12e37 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1517,7 +1517,7 @@ extern void unlock_super(struct super_block *); * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); +extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *); @@ -1623,7 +1623,7 @@ struct inode_operations { int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); + int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,int,dev_t); int (*rename) (struct inode *, struct dentry *, diff --git a/include/linux/security.h b/include/linux/security.h index e8c619d39291..16cbc58cb13b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1453,7 +1453,7 @@ struct security_operations { int (*inode_unlink) (struct inode *dir, struct dentry *dentry); int (*inode_symlink) (struct inode *dir, struct dentry *dentry, const char *old_name); - int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode); + int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode); int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); int (*inode_mknod) (struct inode *dir, struct dentry *dentry, int mode, dev_t dev); @@ -1722,7 +1722,7 @@ int security_inode_link(struct dentry *old_dentry, struct inode *dir, int security_inode_unlink(struct inode *dir, struct dentry *dentry); int security_inode_symlink(struct inode *dir, struct dentry *dentry, const char *old_name); -int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode); +int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); int security_inode_rmdir(struct inode *dir, struct dentry *dentry); int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, -- cgit v1.2.3 From 4acdaf27ebe2034c342f3be57ef49aed1ad885ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 01:42:34 -0400 Subject: switch ->create() to umode_t vfs_create() ignores everything outside of 16bit subset of its mode argument; switching it to umode_t is obviously equivalent and it's the only caller of the method Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- include/linux/security.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f7bd8b12e37..e40321a6e239 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1516,7 +1516,7 @@ extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); +extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); @@ -1619,7 +1619,7 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); void (*put_link) (struct dentry *, struct nameidata *, void *); - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/include/linux/security.h b/include/linux/security.h index 16cbc58cb13b..8fc22373db34 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1447,7 +1447,7 @@ struct security_operations { const struct qstr *qstr, char **name, void **value, size_t *len); int (*inode_create) (struct inode *dir, - struct dentry *dentry, int mode); + struct dentry *dentry, umode_t mode); int (*inode_link) (struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int (*inode_unlink) (struct inode *dir, struct dentry *dentry); @@ -1716,7 +1716,7 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, int security_old_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, char **name, void **value, size_t *len); -int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); +int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int security_inode_unlink(struct inode *dir, struct dentry *dentry); @@ -2061,7 +2061,7 @@ static inline int security_old_inode_init_security(struct inode *inode, static inline int security_inode_create(struct inode *dir, struct dentry *dentry, - int mode) + umode_t mode) { return 0; } -- cgit v1.2.3 From 1a67aafb5f72a436ca044293309fa7e6351d6a35 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 01:52:52 -0400 Subject: switch ->mknod() to umode_t Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- include/linux/security.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e40321a6e239..b89eef1d1752 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1518,7 +1518,7 @@ extern void unlock_super(struct super_block *); */ extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); +extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); @@ -1625,7 +1625,7 @@ struct inode_operations { int (*symlink) (struct inode *,struct dentry *,const char *); int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); + int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); void (*truncate) (struct inode *); diff --git a/include/linux/security.h b/include/linux/security.h index 8fc22373db34..0e5aeb86dfc4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1456,7 +1456,7 @@ struct security_operations { int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode); int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); int (*inode_mknod) (struct inode *dir, struct dentry *dentry, - int mode, dev_t dev); + umode_t mode, dev_t dev); int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); @@ -1724,7 +1724,7 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry, const char *old_name); int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); int security_inode_rmdir(struct inode *dir, struct dentry *dentry); -int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); +int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev); int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int security_inode_readlink(struct dentry *dentry); -- cgit v1.2.3 From 2c9ede55ecec58099b72e4bb8eab719f32f72c31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 20:24:48 -0400 Subject: switch device_get_devnode() and ->devnode() to umode_t * both callers of device_get_devnode() are only interested in lower 16bits and nobody tries to return anything wider than 16bit anyway. Signed-off-by: Al Viro --- include/linux/device.h | 6 +++--- include/linux/genhd.h | 2 +- include/linux/usb.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 3136ede5a1e1..2fe0005543ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -294,7 +294,7 @@ struct class { struct kobject *dev_kobj; int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); @@ -423,7 +423,7 @@ struct device_type { const char *name; const struct attribute_group **groups; int (*uevent)(struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); void (*release)(struct device *dev); const struct dev_pm_ops *pm; @@ -720,7 +720,7 @@ extern int device_rename(struct device *dev, const char *new_name); extern int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); extern const char *device_get_devnode(struct device *dev, - mode_t *mode, const char **tmp); + umode_t *mode, const char **tmp); extern void *dev_get_drvdata(const struct device *dev); extern int dev_set_drvdata(struct device *dev, void *data); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6d18f3531f18..fe23ee768589 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -163,7 +163,7 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *(*devnode)(struct gendisk *gd, mode_t *mode); + char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned int events; /* supported events */ unsigned int async_events; /* async events, subset of all */ diff --git a/include/linux/usb.h b/include/linux/usb.h index d3d0c1374334..a59321779f8b 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -935,7 +935,7 @@ extern struct bus_type usb_bus_type; */ struct usb_class_driver { char *name; - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); const struct file_operations *fops; int minor_base; }; -- cgit v1.2.3 From 9104e427f3e21ddb380ddc39752624365b5bffea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 23:10:46 -0400 Subject: switch sysfs attr->mode to umode_t Signed-off-by: Al Viro --- include/linux/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index dac0859e6440..d1994ec02c89 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -25,7 +25,7 @@ enum kobj_ns_type; struct attribute { const char *name; - mode_t mode; + umode_t mode; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key *key; struct lock_class_key skey; -- cgit v1.2.3 From 587a1f1659e8b330b8738ef4901832a2b63f0bed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 23:11:19 -0400 Subject: switch ->is_visible() to returning umode_t Signed-off-by: Al Viro --- include/linux/iscsi_boot_sysfs.h | 8 ++++---- include/linux/sysfs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h index f0a2f8b0aa13..2a8b1659bf35 100644 --- a/include/linux/iscsi_boot_sysfs.h +++ b/include/linux/iscsi_boot_sysfs.h @@ -91,7 +91,7 @@ struct iscsi_boot_kobj { * The enum of the type. This can be any value of the above * properties. */ - mode_t (*is_visible) (void *data, int type); + umode_t (*is_visible) (void *data, int type); /* * Driver specific release function. @@ -110,20 +110,20 @@ struct iscsi_boot_kobj * iscsi_boot_create_initiator(struct iscsi_boot_kset *boot_kset, int index, void *data, ssize_t (*show) (void *data, int type, char *buf), - mode_t (*is_visible) (void *data, int type), + umode_t (*is_visible) (void *data, int type), void (*release) (void *data)); struct iscsi_boot_kobj * iscsi_boot_create_ethernet(struct iscsi_boot_kset *boot_kset, int index, void *data, ssize_t (*show) (void *data, int type, char *buf), - mode_t (*is_visible) (void *data, int type), + umode_t (*is_visible) (void *data, int type), void (*release) (void *data)); struct iscsi_boot_kobj * iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index, void *data, ssize_t (*show) (void *data, int type, char *buf), - mode_t (*is_visible) (void *data, int type), + umode_t (*is_visible) (void *data, int type), void (*release) (void *data)); struct iscsi_boot_kset *iscsi_boot_create_kset(const char *set_name); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d1994ec02c89..e90eea7afb4e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -55,7 +55,7 @@ do { \ struct attribute_group { const char *name; - mode_t (*is_visible)(struct kobject *, + umode_t (*is_visible)(struct kobject *, struct attribute *, int); struct attribute **attrs; }; -- cgit v1.2.3 From d161a13f974c72fd7ff0069d39a3ae57cb5694ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 03:36:29 -0400 Subject: switch procfs to umode_t use both proc_dir_entry ->mode and populating functions Signed-off-by: Al Viro --- include/linux/ide.h | 2 +- include/linux/proc_fs.h | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 42557851b12e..501370b61ee5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -920,7 +920,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) typedef struct { const char *name; - mode_t mode; + umode_t mode; const struct file_operations *proc_fops; } ide_proc_entry_t; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 643b96c7a94f..6d9e575519cc 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -50,7 +50,7 @@ typedef int (write_proc_t)(struct file *file, const char __user *buffer, struct proc_dir_entry { unsigned int low_ino; - mode_t mode; + umode_t mode; nlink_t nlink; uid_t uid; gid_t gid; @@ -106,9 +106,9 @@ extern void proc_root_init(void); void proc_flush_task(struct task_struct *task); -extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, +extern struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, struct proc_dir_entry *parent); -struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, +struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, void *data); @@ -146,17 +146,17 @@ extern void proc_device_tree_update_prop(struct proc_dir_entry *pde, extern struct proc_dir_entry *proc_symlink(const char *, struct proc_dir_entry *, const char *); extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *); -extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, +extern struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, struct proc_dir_entry *parent); -static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode, +static inline struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops) { return proc_create_data(name, mode, parent, proc_fops, NULL); } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, + umode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { struct proc_dir_entry *res=create_proc_entry(name,mode,base); @@ -168,7 +168,7 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, } extern struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, mode_t mode, const struct file_operations *fops); + const char *name, umode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); @@ -185,15 +185,15 @@ static inline void proc_flush_task(struct task_struct *task) } static inline struct proc_dir_entry *create_proc_entry(const char *name, - mode_t mode, struct proc_dir_entry *parent) { return NULL; } + umode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *proc_create(const char *name, - mode_t mode, struct proc_dir_entry *parent, + umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops) { return NULL; } static inline struct proc_dir_entry *proc_create_data(const char *name, - mode_t mode, struct proc_dir_entry *parent, + umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, void *data) { return NULL; @@ -205,10 +205,10 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, - mode_t mode, struct proc_dir_entry *parent) { return NULL; } + umode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, + umode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { return NULL; } struct tty_driver; -- cgit v1.2.3 From 48176a973d65572e61d0ce95495e5072887e6fb6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 03:40:40 -0400 Subject: switch sysfs_chmod_file() to umode_t Signed-off-by: Al Viro --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e90eea7afb4e..0010009b2f00 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -133,7 +133,7 @@ int __must_check sysfs_create_file(struct kobject *kobj, int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute **attr); int __must_check sysfs_chmod_file(struct kobject *kobj, - const struct attribute *attr, mode_t mode); + const struct attribute *attr, umode_t mode); void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr); @@ -221,7 +221,7 @@ static inline int sysfs_create_files(struct kobject *kobj, } static inline int sysfs_chmod_file(struct kobject *kobj, - const struct attribute *attr, mode_t mode) + const struct attribute *attr, umode_t mode) { return 0; } -- cgit v1.2.3 From f4ae40a6a50a98ac23d4b285f739455e926a473e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 04:33:43 -0400 Subject: switch debugfs to umode_t Signed-off-by: Al Viro --- include/linux/debugfs.h | 46 +++++++++++++++++++++++----------------------- include/linux/relay.h | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e7d9b20ddc5b..d1ac841e8dc7 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -34,7 +34,7 @@ extern struct dentry *arch_debugfs_dir; extern const struct file_operations debugfs_file_operations; extern const struct inode_operations debugfs_link_operations; -struct dentry *debugfs_create_file(const char *name, mode_t mode, +struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -49,28 +49,28 @@ void debugfs_remove_recursive(struct dentry *dentry); struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); -struct dentry *debugfs_create_u8(const char *name, mode_t mode, +struct dentry *debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); -struct dentry *debugfs_create_u16(const char *name, mode_t mode, +struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); -struct dentry *debugfs_create_u32(const char *name, mode_t mode, +struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value); -struct dentry *debugfs_create_u64(const char *name, mode_t mode, +struct dentry *debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); -struct dentry *debugfs_create_x8(const char *name, mode_t mode, +struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value); -struct dentry *debugfs_create_x16(const char *name, mode_t mode, +struct dentry *debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); -struct dentry *debugfs_create_x32(const char *name, mode_t mode, +struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value); -struct dentry *debugfs_create_x64(const char *name, mode_t mode, +struct dentry *debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value); -struct dentry *debugfs_create_size_t(const char *name, mode_t mode, +struct dentry *debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value); -struct dentry *debugfs_create_bool(const char *name, mode_t mode, +struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, u32 *value); -struct dentry *debugfs_create_blob(const char *name, mode_t mode, +struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); @@ -86,7 +86,7 @@ bool debugfs_initialized(void); * want to duplicate the design decision mistakes of procfs and devfs again. */ -static inline struct dentry *debugfs_create_file(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { @@ -118,70 +118,70 @@ static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentr return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u8(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u16(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u32(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u64(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_x8(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_x16(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_x32(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_size_t(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_bool(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { diff --git a/include/linux/relay.h b/include/linux/relay.h index 14a86bc7102b..a822fd71fd64 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -144,7 +144,7 @@ struct rchan_callbacks */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, - int mode, + umode_t mode, struct rchan_buf *buf, int *is_global); -- cgit v1.2.3 From 439475140bed762c04567c325d48409862341ae4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jul 2011 00:05:26 -0400 Subject: configfs: convert to umode_t Signed-off-by: Al Viro --- include/linux/configfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 3081c58d696e..34025df61829 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -124,7 +124,7 @@ extern struct config_item *config_group_find_item(struct config_group *, struct configfs_attribute { const char *ca_name; struct module *ca_owner; - mode_t ca_mode; + umode_t ca_mode; }; /* -- cgit v1.2.3 From 69b34f3ab30836bb736b5108f40bf76de9f656f3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 02:46:57 -0400 Subject: ext3: propagate umode_t Signed-off-by: Al Viro --- include/linux/ext3_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index dec99116a0e4..f957085d40ed 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -884,7 +884,7 @@ extern int ext3fs_dirhash(const char *name, int len, struct /* ialloc.c */ extern struct inode * ext3_new_inode (handle_t *, struct inode *, - const struct qstr *, int); + const struct qstr *, umode_t); extern void ext3_free_inode (handle_t *, struct inode *); extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); extern unsigned long ext3_count_free_inodes (struct super_block *); -- cgit v1.2.3 From 8e0718924e7d7eaf6104e54aeaeda477570e1e06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 02:50:53 -0400 Subject: reiserfs: propagate umode_t Signed-off-by: Al Viro --- include/linux/reiserfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 96d465f8d3e6..26be28fd7b76 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2056,7 +2056,7 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); struct reiserfs_security_handle; int reiserfs_new_inode(struct reiserfs_transaction_handle *th, - struct inode *dir, int mode, + struct inode *dir, umode_t mode, const char *symname, loff_t i_size, struct dentry *dentry, struct inode *inode, struct reiserfs_security_handle *security); -- cgit v1.2.3 From a5e7ed3287e45f2eafbcf9e7e6fdc5a0191acf40 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 01:55:55 -0400 Subject: cgroup: propagate mode_t Signed-off-by: Al Viro --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1b7f9d525013..a17becc36ca1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -319,7 +319,7 @@ struct cftype { * If not 0, file mode is set to this value, otherwise it will * be figured out automatically */ - mode_t mode; + umode_t mode; /* * If non-zero, defines the maximum length of string that can -- cgit v1.2.3 From 64f1426f3c4f8dde9ac9bf3f3b19b88d17f2bae6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jul 2011 00:35:13 -0400 Subject: sunrpc: propagate umode_t Signed-off-by: Al Viro --- include/linux/sunrpc/cache.h | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5efd8cef389e..57531f8e5956 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -203,7 +203,7 @@ extern void cache_unregister(struct cache_detail *cd); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, - mode_t, struct cache_detail *); + umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); extern void qword_add(char **bpp, int *lp, char *str); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index e4ea43058d8f..2bb03d77375a 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -55,7 +55,7 @@ extern int rpc_remove_client_dir(struct dentry *); struct cache_detail; extern struct dentry *rpc_create_cache_dir(struct dentry *, struct qstr *, - mode_t umode, + umode_t umode, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -- cgit v1.2.3 From 09208d150b5cda009b666238a7102cb45ecec2ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 03:15:03 -0400 Subject: shmem, ramfs: propagate umode_t, open-coded S_ISREG Signed-off-by: Al Viro --- include/linux/shmem_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 9291ac3cc627..e4c711c6f321 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -30,7 +30,7 @@ struct shmem_sb_info { spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ uid_t uid; /* Mount uid for root directory */ gid_t gid; /* Mount gid for root directory */ - mode_t mode; /* Mount mode for root directory */ + umode_t mode; /* Mount mode for root directory */ struct mempolicy *mpol; /* default memory policy for mappings */ }; -- cgit v1.2.3 From 632861f05a8e5878a267d173000880ceb608b56e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 03:16:55 -0400 Subject: pohmelfs: propagate umode_t Signed-off-by: Al Viro --- include/linux/ramfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 3a8f0c9b2933..5bf5500db83d 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -2,7 +2,7 @@ #define _LINUX_RAMFS_H struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, - int mode, dev_t dev); + umode_t mode, dev_t dev); extern struct dentry *ramfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); -- cgit v1.2.3 From 62bb109170375f82eb3c51c8080b72954f02dca7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 23:20:18 -0400 Subject: switch inode_init_owner() to umode_t Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b89eef1d1752..9db9f6e6c98b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1534,7 +1534,7 @@ extern void dentry_unhash(struct dentry *dentry); * VFS file helper functions. */ extern void inode_init_owner(struct inode *inode, const struct inode *dir, - mode_t mode); + umode_t mode); /* * VFS FS_IOC_FIEMAP helper definitions. */ -- cgit v1.2.3 From 8d334acdd2c1f57c7a574c6f24d08e4c95582ff0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 23:21:59 -0400 Subject: switch is_sxid() to umode_t Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9db9f6e6c98b..9d02fab420c6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2690,7 +2690,7 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) -static inline int is_sxid(mode_t mode) +static inline int is_sxid(umode_t mode) { return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } -- cgit v1.2.3 From 36fcb589e752fa9c71f8a447db94126d102fd937 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 03:47:31 -0400 Subject: sysctl: use umode_t for table permissions Signed-off-by: Al Viro --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 703cfa33a3ca..bb9127dd814b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1038,7 +1038,7 @@ struct ctl_table const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; - mode_t mode; + umode_t mode; struct ctl_table *child; struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ -- cgit v1.2.3 From 49f0a0767211d3076974e59a26f36b567cbe8621 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 04:22:01 -0400 Subject: switch sys_chmod()/sys_fchmod()/sys_fchmodat() to umode_t SYSCALLx magic should take care of things, according to Linus... Signed-off-by: Al Viro --- include/linux/syscalls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b3c16d8a6383..e1a4b9b81cf2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -483,8 +483,8 @@ asmlinkage long sys_symlink(const char __user *old, const char __user *new); asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); -asmlinkage long sys_chmod(const char __user *filename, mode_t mode); -asmlinkage long sys_fchmod(unsigned int fd, mode_t mode); +asmlinkage long sys_chmod(const char __user *filename, umode_t mode); +asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); #if BITS_PER_LONG == 32 @@ -769,7 +769,7 @@ asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct timeval __user *utimes); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); asmlinkage long sys_fchmodat(int dfd, const char __user * filename, - mode_t mode); + umode_t mode); asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, -- cgit v1.2.3 From 910f4ecef3f67714ebff69d0bc34313e48afaed2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 04:25:58 -0400 Subject: switch security_path_chmod() to umode_t Signed-off-by: Al Viro --- include/linux/security.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 0e5aeb86dfc4..f2c1fd7978a5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1436,7 +1436,7 @@ struct security_operations { int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, - mode_t mode); + umode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); int (*path_chroot) (struct path *path); #endif @@ -2867,7 +2867,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, - mode_t mode); + umode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ @@ -2921,7 +2921,7 @@ static inline int security_path_rename(struct path *old_dir, static inline int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, - mode_t mode) + umode_t mode) { return 0; } -- cgit v1.2.3 From 52ef0c042bf06f6aef382fade175075627beebc1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 04:30:04 -0400 Subject: switch securityfs_create_file() to umode_t Signed-off-by: Al Viro --- include/linux/security.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f2c1fd7978a5..fab659edf11a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -3010,7 +3010,7 @@ static inline void security_audit_rule_free(void *lsmrule) #ifdef CONFIG_SECURITYFS -extern struct dentry *securityfs_create_file(const char *name, mode_t mode, +extern struct dentry *securityfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); @@ -3025,7 +3025,7 @@ static inline struct dentry *securityfs_create_dir(const char *name, } static inline struct dentry *securityfs_create_file(const char *name, - mode_t mode, + umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) -- cgit v1.2.3 From a85cfdaec935ede36be6c54f98878624b0d9fbad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 04:47:38 -0400 Subject: switch miscdevice to umode_t Signed-off-by: Al Viro --- include/linux/miscdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index c41d7270c6c6..32085249e9cb 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -54,7 +54,7 @@ struct miscdevice { struct device *parent; struct device *this_device; const char *nodename; - mode_t mode; + umode_t mode; }; extern int misc_register(struct miscdevice * misc); -- cgit v1.2.3 From df0a42837b86567a130c44515ab620d23e7f182b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 05:26:10 -0400 Subject: switch mq_open() to umode_t --- include/linux/audit.h | 4 ++-- include/linux/syscalls.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2f81c6f3b630..75ed193b11f8 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -474,7 +474,7 @@ extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); extern int audit_set_macxattr(const char *name); -extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr); +extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr); extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); @@ -499,7 +499,7 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } -static inline void audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) +static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { if (unlikely(!audit_dummy_context())) __audit_mq_open(oflag, mode, attr); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e1a4b9b81cf2..d86e5253f84f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -679,7 +679,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, unsigned long third, void __user *ptr, long fifth); -asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr); +asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); asmlinkage long sys_mq_unlink(const char __user *name); asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); -- cgit v1.2.3 From 1bc94226d5c642b78cf6b2e3e843ef24eb740df0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 16:50:23 -0400 Subject: switch spu_create(2) to use of SYSCALL_DEFINE4, make it use umode_t Signed-off-by: Al Viro --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d86e5253f84f..b25621476316 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -753,7 +753,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, mode_t mode, int fd); + unsigned int flags, umode_t mode, int fd); asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, unsigned dev); -- cgit v1.2.3 From 0583fcc96bb117763c0fa74c123573c0112dec65 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jul 2011 17:04:15 -0400 Subject: consolidate umode_t declarations Signed-off-by: Al Viro --- include/linux/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 57a97234bec1..f0ac9bda0f78 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -24,6 +24,7 @@ typedef __kernel_fd_set fd_set; typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; +typedef unsigned short umode_t; typedef __kernel_nlink_t nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; -- cgit v1.2.3 From 2570ebbd1f1ce1ef31f568b0660354fc59424be2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jul 2011 14:03:22 -0400 Subject: switch kern_ipc_perm to umode_t Signed-off-by: Al Viro --- include/linux/audit.h | 4 ++-- include/linux/ipc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 75ed193b11f8..426ab9f4dd85 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -468,7 +468,7 @@ extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); -extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); +extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern int audit_bprm(struct linux_binprm *bprm); extern void audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); @@ -494,7 +494,7 @@ static inline void audit_fd_pair(int fd1, int fd2) if (unlikely(!audit_dummy_context())) __audit_fd_pair(fd1, fd2); } -static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) +static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 3b1594d662b0..30e816148df4 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -93,7 +93,7 @@ struct kern_ipc_perm gid_t gid; uid_t cuid; gid_t cgid; - mode_t mode; + umode_t mode; unsigned long seq; void *security; }; -- cgit v1.2.3 From 4572befe248fd0d94aedc98775e3f0ddc8a26651 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Nov 2011 14:56:21 -0500 Subject: switch ->path_mkdir() to umode_t Signed-off-by: Al Viro --- include/linux/security.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index fab659edf11a..24cd7cf48564 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1424,7 +1424,7 @@ struct security_operations { #ifdef CONFIG_SECURITY_PATH int (*path_unlink) (struct path *dir, struct dentry *dentry); - int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode); + int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); int (*path_rmdir) (struct path *dir, struct dentry *dentry); int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode, unsigned int dev); @@ -2855,7 +2855,7 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_SECURITY_PATH int security_path_unlink(struct path *dir, struct dentry *dentry); -int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode); +int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(struct path *dir, struct dentry *dentry); int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, unsigned int dev); @@ -2877,7 +2877,7 @@ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) } static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, - int mode) + umode_t mode) { return 0; } -- cgit v1.2.3 From 04fc66e789a896e684bfdca30208e57eb832dd96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Nov 2011 14:58:38 -0500 Subject: switch ->path_mknod() to umode_t Signed-off-by: Al Viro --- include/linux/security.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 24cd7cf48564..535721cc374a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1426,7 +1426,7 @@ struct security_operations { int (*path_unlink) (struct path *dir, struct dentry *dentry); int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); int (*path_rmdir) (struct path *dir, struct dentry *dentry); - int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode, + int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate) (struct path *path); int (*path_symlink) (struct path *dir, struct dentry *dentry, @@ -2857,7 +2857,7 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi int security_path_unlink(struct path *dir, struct dentry *dentry); int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(struct path *dir, struct dentry *dentry); -int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, +int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(struct path *path); int security_path_symlink(struct path *dir, struct dentry *dentry, @@ -2888,7 +2888,7 @@ static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) } static inline int security_path_mknod(struct path *dir, struct dentry *dentry, - int mode, unsigned int dev) + umode_t mode, unsigned int dev) { return 0; } -- cgit v1.2.3 From a218d0fdc5f9004164ff151d274487f6799907d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Nov 2011 14:59:34 -0500 Subject: switch open and mkdir syscalls to umode_t Signed-off-by: Al Viro --- include/linux/compat.h | 4 ++-- include/linux/fs.h | 4 ++-- include/linux/syscalls.h | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 66ed067fb729..41c9f6515f46 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -422,9 +422,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, - int mode); + umode_t mode); asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, - int flags, int mode); + int flags, umode_t mode); asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9d02fab420c6..f0e57b7e4297 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2054,8 +2054,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, extern int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, - int mode); -extern struct file *filp_open(const char *, int, int); + umode_t mode); +extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b25621476316..515669fa3c1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -517,9 +517,9 @@ asmlinkage long sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count); asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz); -asmlinkage long sys_creat(const char __user *pathname, int mode); +asmlinkage long sys_creat(const char __user *pathname, umode_t mode); asmlinkage long sys_open(const char __user *filename, - int flags, int mode); + int flags, umode_t mode); asmlinkage long sys_close(unsigned int fd); asmlinkage long sys_access(const char __user *filename, int mode); asmlinkage long sys_vhangup(void); @@ -582,7 +582,7 @@ asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_mkdir(const char __user *pathname, int mode); +asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_rmdir(const char __user *pathname); @@ -757,7 +757,7 @@ asmlinkage long sys_spu_create(const char __user *name, asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, unsigned dev); -asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode); +asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); asmlinkage long sys_symlinkat(const char __user * oldname, int newdfd, const char __user * newname); @@ -773,7 +773,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user * filename, asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, - int mode); + umode_t mode); asmlinkage long sys_newfstatat(int dfd, const char __user *filename, struct stat __user *statbuf, int flag); asmlinkage long sys_fstatat64(int dfd, const char __user *filename, -- cgit v1.2.3 From 1b8e5564b9d34cbeb3047dd2be8ec9cd5e2785e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Nov 2011 21:01:32 -0500 Subject: vfs: the first spoils - mnt_hash moved taken out of struct vfsmount into struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 00f5c4f2160b..77c913dc7397 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -53,7 +53,6 @@ struct mnt_pcp { }; struct vfsmount { - struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3 From 3376f34fff5be9954fd9a9c4fd68f4a0a36d480e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Nov 2011 22:05:19 -0500 Subject: vfs: mnt_parent moved to struct mount the second victim... Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 77c913dc7397..b69362d2b5b2 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -53,7 +53,6 @@ struct mnt_pcp { }; struct vfsmount { - struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ -- cgit v1.2.3 From a73324da7af4052e1d1ddec6a5980f552420e58b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Nov 2011 22:25:07 -0500 Subject: vfs: move mnt_mountpoint to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index b69362d2b5b2..e3f005993d0f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -53,7 +53,6 @@ struct mnt_pcp { }; struct vfsmount { - struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ #ifdef CONFIG_SMP -- cgit v1.2.3 From 68e8a9feab251f9d3c8fd9e9893c97843bcd4bd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Nov 2011 22:53:09 -0500 Subject: vfs: all counters taken to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index e3f005993d0f..cc01ed1bc719 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,21 +47,9 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 -struct mnt_pcp { - int mnt_count; - int mnt_writers; -}; - struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ -#ifdef CONFIG_SMP - struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longterm; /* how many of the refs are longterm */ -#else - int mnt_count; - int mnt_writers; -#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; -- cgit v1.2.3 From 6b41d536f7c84e7cb1c1462073150277e46f6ea8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Nov 2011 23:24:33 -0500 Subject: vfs: take mnt_child/mnt_mounts to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index cc01ed1bc719..e9990254d4d0 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -50,8 +50,6 @@ struct mnt_namespace; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ - struct list_head mnt_mounts; /* list of children, anchored here */ - struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; /* 4 bytes hole on 64bits arches without fsnotify */ #ifdef CONFIG_FSNOTIFY -- cgit v1.2.3 From d10e8def07fc87488c396d2eff2c26c43bb541dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 00:07:16 -0500 Subject: vfs: take mnt_master to struct mount make IS_MNT_SLAVE take struct mount * at the same time Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index e9990254d4d0..2d5beb5e3a8c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -62,7 +62,6 @@ struct vfsmount { struct list_head mnt_share; /* circular list of shared mounts */ struct list_head mnt_slave_list;/* list of slave mounts */ struct list_head mnt_slave; /* slave list entry */ - struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ -- cgit v1.2.3 From 6776db3d32b2a59198ec7ac6d32be0b9fdbd8a68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 00:22:05 -0500 Subject: vfs: take mnt_share/mnt_slave/mnt_slave_list and mnt_expire to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 2d5beb5e3a8c..2f5f3ae3bd2d 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -58,10 +58,6 @@ struct vfsmount { #endif const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - struct list_head mnt_expire; /* link in fs-specific expiry list */ - struct list_head mnt_share; /* circular list of shared mounts */ - struct list_head mnt_slave_list;/* list of slave mounts */ - struct list_head mnt_slave; /* slave list entry */ struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ -- cgit v1.2.3 From 143c8c91cee7efdd732ec5f61b3471fc46192f20 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 00:46:35 -0500 Subject: vfs: mnt_ns moved to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 2f5f3ae3bd2d..eb8c1f1be90c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -58,7 +58,6 @@ struct vfsmount { #endif const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ -- cgit v1.2.3 From 15169fe784a9846b24cdb0840329d41aebc23249 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 00:50:41 -0500 Subject: vfs: mnt_id/mnt_group_id moved Signed-off-by: Al Viro --- include/linux/mount.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index eb8c1f1be90c..b26dc40bfafc 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -58,8 +58,6 @@ struct vfsmount { #endif const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - int mnt_id; /* mount identifier */ - int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -- cgit v1.2.3 From 863d684f946eb240c7dd57d265d88315950ca5cc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 00:57:42 -0500 Subject: vfs: move the rest of int fields to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index b26dc40bfafc..080e3088ca81 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -58,9 +58,6 @@ struct vfsmount { #endif const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - int mnt_expiry_mark; /* true if marked for expiry */ - int mnt_pinned; - int mnt_ghosts; }; struct file; /* forward dec */ -- cgit v1.2.3 From 1a4eeaf2a8c07404e2d1c3ff99b393fd4c207170 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 02:19:55 -0500 Subject: vfs: move mnt_list to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 080e3088ca81..16ae3d46b30a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -57,7 +57,6 @@ struct vfsmount { struct hlist_head mnt_fsnotify_marks; #endif const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ - struct list_head mnt_list; }; struct file; /* forward dec */ -- cgit v1.2.3 From 52ba1621de1479ce7e52b6d167860462e483313c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 02:25:17 -0500 Subject: vfs: move mnt_devname Signed-off-by: Al Viro --- include/linux/mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 16ae3d46b30a..f18dd1bfcbda 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -56,7 +56,6 @@ struct vfsmount { __u32 mnt_fsnotify_mask; struct hlist_head mnt_fsnotify_marks; #endif - const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ }; struct file; /* forward dec */ -- cgit v1.2.3 From c63181e6b6df89176b3984c6977bb5ec03d0df23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Nov 2011 02:35:16 -0500 Subject: vfs: move fsnotify junk to struct mount Signed-off-by: Al Viro --- include/linux/mount.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index f18dd1bfcbda..d7029f4a191a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -51,11 +51,6 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; - /* 4 bytes hole on 64bits arches without fsnotify */ -#ifdef CONFIG_FSNOTIFY - __u32 mnt_fsnotify_mask; - struct hlist_head mnt_fsnotify_marks; -#endif }; struct file; /* forward dec */ -- cgit v1.2.3 From 0226f4923f6c9b40cfa1c1c1b19a6ac6b3924ead Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 6 Dec 2011 12:21:54 -0500 Subject: vfs: take /proc/*/mounts and friends to fs/proc_namespace.c rationale: that stuff is far tighter bound to fs/namespace.c than to the guts of procfs proper. Signed-off-by: Al Viro --- include/linux/mnt_namespace.h | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index e87ec01aac9d..5a8e3903d770 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -2,38 +2,16 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ -#include -#include -#include - -struct mnt_namespace { - atomic_t count; - struct vfsmount * root; - struct list_head list; - wait_queue_head_t poll; - int event; -}; - -struct proc_mounts { - struct seq_file m; /* must be the first element */ - struct mnt_namespace *ns; - struct path root; -}; - +struct mnt_namespace; struct fs_struct; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); -static inline void get_mnt_ns(struct mnt_namespace *ns) -{ - atomic_inc(&ns->count); -} -extern const struct seq_operations mounts_op; -extern const struct seq_operations mountinfo_op; -extern const struct seq_operations mountstats_op; -extern int mnt_had_events(struct proc_mounts *); +extern const struct file_operations proc_mounts_operations; +extern const struct file_operations proc_mountinfo_operations; +extern const struct file_operations proc_mountstats_operations; #endif #endif -- cgit v1.2.3 From 55664f324c2a1a6386dc88492c5c94aa3d336b93 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Jan 2012 12:04:51 +0000 Subject: ethtool: Allow drivers to select RX NFC rule locations Define special location values for RX NFC that request the driver to select the actual rule location. This allows for implementation on devices that use hash-based filter lookup, whereas currently the API is more suited to devices with TCAM lookup or linear search. In ethtool_set_rxnfc() and the compat wrapper ethtool_ioctl(), copy the structure back to user-space after insertion so that the actual location is returned. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b38bf69310ee..d901714120a3 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -489,7 +489,10 @@ struct ethtool_rx_flow_spec { * on return. * * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined - * rules on return. + * rules on return. If @data is non-zero on return then it is the + * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the + * driver supports any special location values. If that flag is not + * set in @data then special location values should not be used. * * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an * existing rule on entry and @fs contains the rule on return. @@ -501,10 +504,23 @@ struct ethtool_rx_flow_spec { * must use the second parameter to get_rxnfc() instead of @rule_locs. * * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update. - * @fs.@location specifies the location to use and must not be ignored. + * @fs.@location either specifies the location to use or is a special + * location value with %RX_CLS_LOC_SPECIAL flag set. On return, + * @fs.@location is the actual rule location. * * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an * existing rule on entry. + * + * A driver supporting the special location values for + * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused + * location, and may remove a rule at a later location (lower + * priority) that matches exactly the same set of flows. The special + * values are: %RX_CLS_LOC_ANY, selecting any location; + * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum + * priority); and %RX_CLS_LOC_LAST, selecting the last suitable + * location (minimum priority). Additional special values may be + * defined in future and drivers must return -%EINVAL for any + * unrecognised value. */ struct ethtool_rxnfc { __u32 cmd; @@ -1141,6 +1157,12 @@ struct ethtool_ops { #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +/* Special RX classification rule insert location values */ +#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */ +#define RX_CLS_LOC_ANY 0xffffffff +#define RX_CLS_LOC_FIRST 0xfffffffe +#define RX_CLS_LOC_LAST 0xfffffffd + /* Reset flags */ /* The reset() operation must clear the flags for the components which * were actually reset. On successful return, the flags indicate the -- cgit v1.2.3 From 6cfb5e759d47f037cbd0953ec2c3ceb220ed9e96 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Jan 2012 12:07:59 +0000 Subject: ethtool: Remove ethtool_ops::set_rx_ntuple operation All implementations have been converted to implement set_rxnfc instead. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d901714120a3..da5b2de99ae4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -859,8 +859,6 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @reset: Reset (part of) the device, as specified by a bitmask of * flags from &enum ethtool_reset_flags. Returns a negative * error code or zero. - * @set_rx_ntuple: Set an RX n-tuple rule. Returns a negative error code - * or zero. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. @@ -929,8 +927,6 @@ struct ethtool_ops { int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); int (*reset)(struct net_device *, u32 *); - int (*set_rx_ntuple)(struct net_device *, - struct ethtool_rx_ntuple *); u32 (*get_rxfh_indir_size)(struct net_device *); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); -- cgit v1.2.3 From 288e0713f469c03dbc412153b5341d6dfc2c9907 Mon Sep 17 00:00:00 2001 From: Ilan Elias Date: Thu, 22 Dec 2011 11:51:54 +0200 Subject: NFC: Export a new attribute nfcid1 in target info The nfcid1 is the NFC-A identifier. It is exported as an attribute of the target info (returned as a response to NFC_CMD_GET_TARGET). Signed-off-by: Ilan Elias Acked-by: Samuel Ortiz Signed-off-by: John W. Linville --- include/linux/nfc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 89fee4ab1904..01d4e5d60325 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -88,6 +88,7 @@ enum nfc_commands { * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the * target is not NFC-Forum compliant) + * @NFC_ATTR_TARGET_NFCID1: NFC-A targets identifier, max 10 bytes * @NFC_ATTR_COMM_MODE: Passive or active mode * @NFC_ATTR_RF_MODE: Initiator or target */ @@ -99,6 +100,7 @@ enum nfc_attrs { NFC_ATTR_TARGET_INDEX, NFC_ATTR_TARGET_SENS_RES, NFC_ATTR_TARGET_SEL_RES, + NFC_ATTR_TARGET_NFCID1, NFC_ATTR_COMM_MODE, NFC_ATTR_RF_MODE, /* private: internal use only */ -- cgit v1.2.3 From b9d4e714a86a4e88c2f530c76597f7025e5851d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 4 Jan 2012 15:05:10 -0800 Subject: driver core: remove __must_check from device_create_file With the conversion of the sysdev to a real struct device, more drivers are calling device_create_file, and some of them don't check the return value, which isn't wise. But as they happen to be in parts of the kernel where a warning is considered an error (i.e. powerpc), this breaks the build. So for now, remove the marking on the function, which fixes the build problems. Reported-by: Stephen Rothwell Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7f9fc1505e94..acf505e4fe94 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -510,8 +510,8 @@ ssize_t device_store_int(struct device *dev, struct device_attribute *attr, struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } -extern int __must_check device_create_file(struct device *device, - const struct device_attribute *entry); +extern int device_create_file(struct device *device, + const struct device_attribute *entry); extern void device_remove_file(struct device *dev, const struct device_attribute *attr); extern int __must_check device_create_bin_file(struct device *dev, -- cgit v1.2.3 From 68bad94ed801d955535cb50dde3412944a24530c Mon Sep 17 00:00:00 2001 From: Neerav Parikh Date: Wed, 4 Jan 2012 20:23:39 +0000 Subject: netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call This adds a new ndo_get_fcoe_hbainfo() call in net_device_ops for FCoE protocol stack. If supported by the underlying device, the FCoE protocol stack will call this to get device specific information from the underlying device. This information will then be utilized by the FCoE protocol stack to register Fiber Channel HBA attributes with the Fiber Channel Management Service via Fabric Device Management Interface (FDMI) as per the T11 FC-GS specification. Changes in v2: - As per comments from David Miller aligning the parameters of the ndo_get_fcoe_hbainfo() Signed-off-by: Neerav Parikh Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a776a675c0e5..a1d109590da4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -707,6 +707,23 @@ struct netdev_tc_txq { u16 offset; }; +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +/* + * This structure is to hold information about the device + * configured to run FCoE protocol stack. + */ +struct netdev_fcoe_hbainfo { + char manufacturer[64]; + char serial_number[64]; + char hardware_version[64]; + char driver_version[64]; + char optionrom_version[64]; + char firmware_version[64]; + char model[256]; + char model_description[256]; +}; +#endif + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -847,6 +864,13 @@ struct netdev_tc_txq { * perform necessary setup and returns 1 to indicate the device is set up * successfully to perform DDP on this I/O, otherwise this returns 0. * + * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, + * struct netdev_fcoe_hbainfo *hbainfo); + * Called when the FCoE Protocol stack wants information on the underlying + * device. This information is utilized by the FCoE protocol stack to + * register attributes with Fiber Channel management service as per the + * FC-GS Fabric Device Management Information(FDMI) specification. + * * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); * Called when the underlying device wants to override default World Wide * Name (WWN) generation mechanism in FCoE protocol stack to pass its own @@ -950,6 +974,8 @@ struct net_device_ops { u16 xid, struct scatterlist *sgl, unsigned int sgc); + int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, + struct netdev_fcoe_hbainfo *hbainfo); #endif #if IS_ENABLED(CONFIG_LIBFCOE) -- cgit v1.2.3 From 18cb809850fb499ad9bf288696a95f4071f73931 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Jan 2012 14:18:38 +0000 Subject: net_sched: sfq: extend limits SFQ as implemented in Linux is very limited, with at most 127 flows and limit of 127 packets. [ So if 127 flows are active, we have one packet per flow ] This patch brings to SFQ following features to cope with modern needs. - Ability to specify a smaller per flow limit of inflight packets. (default value being at 127 packets) - Ability to have up to 65408 active flows (instead of 127) - Ability to have head drops instead of tail drops (to drop old packets from a flow) Example of use : No more than 20 packets per flow, max 8000 flows, max 20000 packets in SFQ qdisc, hash table of 65536 slots. tc qdisc add ... sfq \ flows 8000 \ depth 20 \ headdrop \ limit 20000 \ divisor 65536 Ram usage : 2 bytes per hash table entry (instead of previous 1 byte/entry) 32 bytes per flow on 64bit arches, instead of 384 for QFQ, so much better cache hit ratio. Signed-off-by: Eric Dumazet CC: Dave Taht Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 8daced32a014..8f1b928f777c 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -162,19 +162,17 @@ struct tc_sfq_qopt { unsigned flows; /* Maximal number of flows */ }; +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +}; + + struct tc_sfq_xstats { __s32 allot; }; -/* - * NOTE: limit, divisor and flows are hardwired to code at the moment. - * - * limit=flows=128, divisor=1024; - * - * The only reason for this is efficiency, it is possible - * to change these parameters in compile time. - */ - /* RED section */ enum { -- cgit v1.2.3 From 9f42f126154786e6e76df513004800c8c633f020 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 5 Jan 2012 07:13:39 +0000 Subject: net: pack skb_shared_info more efficiently nr_frags can be 8 bits since 256 is plenty of fragments. This allows it to be packed with tx_flags. Also by moving ip6_frag_id and dataref (both 4 bytes) next to each other we can avoid a hole between ip6_frag_id and frag_list on 64 bit systems. Signed-off-by: Ian Campbell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f47f0c3939f2..50db9b04a552 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -242,15 +242,15 @@ struct ubuf_info { * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - unsigned short nr_frags; + unsigned char nr_frags; + __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; unsigned short gso_type; - __be32 ip6_frag_id; - __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + __be32 ip6_frag_id; /* * Warning : all fields before dataref are cleared in __alloc_skb() -- cgit v1.2.3 From d15bd7ee445d0702ad801fdaece348fdb79e6581 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Mon, 26 Dec 2011 14:53:15 +0530 Subject: dma-buf: Introduce dma buffer sharing mechanism This is the first step in defining a dma buffer sharing mechanism. A new buffer object dma_buf is added, with operations and API to allow easy sharing of this buffer object across devices. The framework allows: - creation of a buffer object, its association with a file pointer, and associated allocator-defined operations on that buffer. This operation is called the 'export' operation. - different devices to 'attach' themselves to this exported buffer object, to facilitate backing storage negotiation, using dma_buf_attach() API. - the exported buffer object to be shared with the other entity by asking for its 'file-descriptor (fd)', and sharing the fd across. - a received fd to get the buffer object back, where it can be accessed using the associated exporter-defined operations. - the exporter and user to share the scatterlist associated with this buffer object using map_dma_buf and unmap_dma_buf operations. Atleast one 'attach()' call is required to be made prior to calling the map_dma_buf() operation. Couple of building blocks in map_dma_buf() are added to ease introduction of sync'ing across exporter and users, and late allocation by the exporter. For this first version, this framework will work with certain conditions: - *ONLY* exporter will be allowed to mmap to userspace (outside of this framework - mmap is not a buffer object operation), - currently, *ONLY* users that do not need CPU access to the buffer are allowed. More details are there in the documentation patch. This is based on design suggestions from many people at the mini-summits[1], most notably from Arnd Bergmann , Rob Clark and Daniel Vetter . The implementation is inspired from proof-of-concept patch-set from Tomasz Stanislawski , who demonstrated buffer sharing between two v4l2 devices. [2] [1]: https://wiki.linaro.org/OfficeofCTO/MemoryManagement [2]: http://lwn.net/Articles/454389 Signed-off-by: Sumit Semwal Signed-off-by: Sumit Semwal Reviewed-by: Daniel Vetter Reviewed-by: Dave Airlie Reviewed-and-Tested-by: Rob Clark Signed-off-by: Dave Airlie --- include/linux/dma-buf.h | 176 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 include/linux/dma-buf.h (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h new file mode 100644 index 000000000000..f8ac076afa52 --- /dev/null +++ b/include/linux/dma-buf.h @@ -0,0 +1,176 @@ +/* + * Header file for dma buffer sharing framework. + * + * Copyright(C) 2011 Linaro Limited. All rights reserved. + * Author: Sumit Semwal + * + * Many thanks to linaro-mm-sig list, and specially + * Arnd Bergmann , Rob Clark and + * Daniel Vetter for their support in creation and + * refining of this idea. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#ifndef __DMA_BUF_H__ +#define __DMA_BUF_H__ + +#include +#include +#include +#include +#include +#include + +struct dma_buf; +struct dma_buf_attachment; + +/** + * struct dma_buf_ops - operations possible on struct dma_buf + * @attach: [optional] allows different devices to 'attach' themselves to the + * given buffer. It might return -EBUSY to signal that backing storage + * is already allocated and incompatible with the requirements + * of requesting device. + * @detach: [optional] detach a given device from this buffer. + * @map_dma_buf: returns list of scatter pages allocated, increases usecount + * of the buffer. Requires atleast one attach to be called + * before. Returned sg list should already be mapped into + * _device_ address space. This call may sleep. May also return + * -EINTR. Should return -EINVAL if attach hasn't been called yet. + * @unmap_dma_buf: decreases usecount of buffer, might deallocate scatter + * pages. + * @release: release this buffer; to be called after the last dma_buf_put. + */ +struct dma_buf_ops { + int (*attach)(struct dma_buf *, struct device *, + struct dma_buf_attachment *); + + void (*detach)(struct dma_buf *, struct dma_buf_attachment *); + + /* For {map,unmap}_dma_buf below, any specific buffer attributes + * required should get added to device_dma_parameters accessible + * via dev->dma_params. + */ + struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *, + enum dma_data_direction); + void (*unmap_dma_buf)(struct dma_buf_attachment *, + struct sg_table *); + /* TODO: Add try_map_dma_buf version, to return immed with -EBUSY + * if the call would block. + */ + + /* after final dma_buf_put() */ + void (*release)(struct dma_buf *); + +}; + +/** + * struct dma_buf - shared buffer object + * @size: size of the buffer + * @file: file pointer used for sharing buffers across, and for refcounting. + * @attachments: list of dma_buf_attachment that denotes all devices attached. + * @ops: dma_buf_ops associated with this buffer object. + * @priv: exporter specific private data for this buffer object. + */ +struct dma_buf { + size_t size; + struct file *file; + struct list_head attachments; + const struct dma_buf_ops *ops; + /* mutex to serialize list manipulation and other ops */ + struct mutex lock; + void *priv; +}; + +/** + * struct dma_buf_attachment - holds device-buffer attachment data + * @dmabuf: buffer for this attachment. + * @dev: device attached to the buffer. + * @node: list of dma_buf_attachment. + * @priv: exporter specific attachment data. + * + * This structure holds the attachment information between the dma_buf buffer + * and its user device(s). The list contains one attachment struct per device + * attached to the buffer. + */ +struct dma_buf_attachment { + struct dma_buf *dmabuf; + struct device *dev; + struct list_head node; + void *priv; +}; + +#ifdef CONFIG_DMA_SHARED_BUFFER +struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, + struct device *dev); +void dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *dmabuf_attach); +struct dma_buf *dma_buf_export(void *priv, struct dma_buf_ops *ops, + size_t size, int flags); +int dma_buf_fd(struct dma_buf *dmabuf); +struct dma_buf *dma_buf_get(int fd); +void dma_buf_put(struct dma_buf *dmabuf); + +struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, + enum dma_data_direction); +void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *); +#else + +static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, + struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + +static inline void dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *dmabuf_attach) +{ + return; +} + +static inline struct dma_buf *dma_buf_export(void *priv, + struct dma_buf_ops *ops, + size_t size, int flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline int dma_buf_fd(struct dma_buf *dmabuf) +{ + return -ENODEV; +} + +static inline struct dma_buf *dma_buf_get(int fd) +{ + return ERR_PTR(-ENODEV); +} + +static inline void dma_buf_put(struct dma_buf *dmabuf) +{ + return; +} + +static inline struct sg_table *dma_buf_map_attachment( + struct dma_buf_attachment *attach, enum dma_data_direction write) +{ + return ERR_PTR(-ENODEV); +} + +static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, + struct sg_table *sg) +{ + return; +} + +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From cdcf116d44e78c7216ba9f8be9af1cdfca7af728 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 10:51:53 -0500 Subject: switch security_path_chmod() to struct path * Signed-off-by: Al Viro --- include/linux/security.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 535721cc374a..4298d2dbafa3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1435,8 +1435,7 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, - umode_t mode); + int (*path_chmod) (struct path *path, umode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); int (*path_chroot) (struct path *path); #endif @@ -2866,8 +2865,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); -int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, - umode_t mode); +int security_path_chmod(struct path *path, umode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ @@ -2919,9 +2917,7 @@ static inline int security_path_rename(struct path *old_dir, return 0; } -static inline int security_path_chmod(struct dentry *dentry, - struct vfsmount *mnt, - umode_t mode) +static inline int security_path_chmod(struct path *path, umode_t mode) { return 0; } -- cgit v1.2.3 From 64132379d509184425672e0dce1ac0a031e3f2a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 20:51:13 -0500 Subject: vfs: switch ->show_stats to struct dentry * Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 659be7d82617..b2e4b6f639e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1675,7 +1675,7 @@ struct super_operations { int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_devname)(struct seq_file *, struct vfsmount *); int (*show_path)(struct seq_file *, struct vfsmount *); - int (*show_stats)(struct seq_file *, struct vfsmount *); + int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); -- cgit v1.2.3 From d861c630e99febe5ce6055290085556c5b714b06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 21:32:45 -0500 Subject: vfs: switch ->show_devname() to struct dentry * Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b2e4b6f639e4..a8dff43d1b9d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1673,7 +1673,7 @@ struct super_operations { void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); - int (*show_devname)(struct seq_file *, struct vfsmount *); + int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA -- cgit v1.2.3 From a6322de67b58a00e3a783ad9c87c2a11b2d67b47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 21:37:57 -0500 Subject: vfs: switch ->show_path() to struct dentry * Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a8dff43d1b9d..13721b073407 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1674,7 +1674,7 @@ struct super_operations { int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_devname)(struct seq_file *, struct dentry *); - int (*show_path)(struct seq_file *, struct vfsmount *); + int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); -- cgit v1.2.3 From 34c80b1d93e6e20ca9dea0baf583a5b5510d92d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 21:32:45 -0500 Subject: vfs: switch ->show_options() to struct dentry * Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 13721b073407..cc1021fd19ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1672,7 +1672,7 @@ struct super_operations { int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); - int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); @@ -2592,7 +2592,7 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern void file_update_time(struct file *file); -extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); +extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); extern void replace_mount_options(struct super_block *sb, char *options); -- cgit v1.2.3 From 39f7c4db1d2d9e2e2a90abdf34811783089d217d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:30 +0100 Subject: vfs: keep list of mounts for each superblock Keep track of vfsmounts belonging to a superblock. List is protected by vfsmount_lock. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cc1021fd19ef..03385acd71e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1428,6 +1428,7 @@ struct super_block { #else struct list_head s_files; #endif + struct list_head s_mounts; /* list of mounts; _not_ for fs use */ /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ -- cgit v1.2.3 From 4ed5e82fe77f4147cf386327c9a63a2dd7eff518 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:31 +0100 Subject: vfs: protect remounting superblock read-only Currently remouting superblock read-only is racy in a major way. With the per mount read-only infrastructure it is now possible to prevent most races, which this patch attempts. Before starting the remount read-only, iterate through all mounts belonging to the superblock and if none of them have any pending writes, set sb->s_readonly_remount. This indicates that remount is in progress and no further write requests are allowed. If the remount succeeds set MS_RDONLY and reset s_readonly_remount. If the remounting is unsuccessful just reset s_readonly_remount. This can result in transient EROFS errors, despite the fact the remount failed. Unfortunately hodling off writes is difficult as remount itself may touch the filesystem (e.g. through load_nls()) which would deadlock. A later patch deals with delayed writes due to nlink going to zero. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 03385acd71e8..7b8a681b1ef4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1482,6 +1482,9 @@ struct super_block { int cleancache_poolid; struct shrinker s_shrink; /* per-sb shrinker handle */ + + /* Being remounted read-only */ + int s_readonly_remount; }; /* superblock cache pruning functions */ -- cgit v1.2.3 From 7ada4db88634429f4da690ad1c4eb73c93085f0c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:32 +0100 Subject: vfs: count unlinked inodes Add a new counter to the superblock that keeps track of unlinked but not yet deleted inodes. Do not WARN_ON if set_nlink is called with zero count, just do a ratelimited printk. This happens on xfs and probably other filesystems after an unclean shutdown when the filesystem reads inodes which already have zero i_nlink. Reported by Christoph Hellwig. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- include/linux/fs.h | 61 +++++++----------------------------------------------- 1 file changed, 7 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b8a681b1ef4..8ac40921f5ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1483,6 +1483,9 @@ struct super_block { struct shrinker s_shrink; /* per-sb shrinker handle */ + /* Number of inodes with nlink == 0 but still referenced */ + atomic_long_t s_remove_count; + /* Being remounted read-only */ int s_readonly_remount; }; @@ -1768,31 +1771,10 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -/** - * set_nlink - directly set an inode's link count - * @inode: inode - * @nlink: new nlink (should be non-zero) - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. - */ -static inline void set_nlink(struct inode *inode, unsigned int nlink) -{ - inode->__i_nlink = nlink; -} - -/** - * inc_nlink - directly increment an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. Currently, - * it is only here for parity with dec_nlink(). - */ -static inline void inc_nlink(struct inode *inode) -{ - inode->__i_nlink++; -} +extern void inc_nlink(struct inode *inode); +extern void drop_nlink(struct inode *inode); +extern void clear_nlink(struct inode *inode); +extern void set_nlink(struct inode *inode, unsigned int nlink); static inline void inode_inc_link_count(struct inode *inode) { @@ -1800,35 +1782,6 @@ static inline void inode_inc_link_count(struct inode *inode) mark_inode_dirty(inode); } -/** - * drop_nlink - directly drop an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. In cases - * where we are attempting to track writes to the - * filesystem, a decrement to zero means an imminent - * write when the file is truncated and actually unlinked - * on the filesystem. - */ -static inline void drop_nlink(struct inode *inode) -{ - inode->__i_nlink--; -} - -/** - * clear_nlink - directly zero an inode's link count - * @inode: inode - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. See - * drop_nlink() for why we care about i_nlink hitting zero. - */ -static inline void clear_nlink(struct inode *inode) -{ - inode->__i_nlink = 0; -} - static inline void inode_dec_link_count(struct inode *inode) { drop_nlink(inode); -- cgit v1.2.3 From 8e8b87964bc8dc5c14b6543fc933b7725f07d3ac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:33 +0100 Subject: vfs: prevent remount read-only if pending removes If there are any inodes on the super block that have been unlinked (i_nlink == 0) but have not yet been deleted then prevent the remounting the super block read-only. Reported-by: Toshiyuki Okajima Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ac40921f5ac..7aacf31418fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2150,8 +2150,6 @@ extern const struct file_operations read_pipefifo_fops; extern const struct file_operations write_pipefifo_fops; extern const struct file_operations rdwr_pipefifo_fops; -extern int fs_may_remount_ro(struct super_block *); - #ifdef CONFIG_BLOCK /* * return READ, READA, or WRITE -- cgit v1.2.3 From c3aa077648e147783a7a53b409578234647db853 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2011 20:17:10 +0100 Subject: reiserfs: Properly display mount options in /proc/mounts Make reiserfs properly display mount options in /proc/mounts. CC: reiserfs-devel@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/reiserfs_fs.h | 7 ++++--- include/linux/reiserfs_fs_sb.h | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 26be28fd7b76..2213ddcce20c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1759,13 +1759,14 @@ struct reiserfs_journal_header { REISERFS_QUOTA_TRANS_BLOCKS(sb))) #ifdef CONFIG_QUOTA +#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) /* We need to update data and inode (atime) */ -#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) /* 1 balancing, 1 bitmap, 1 data per write + stat data update */ -#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) /* same as with INIT */ -#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) #else #define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 52c83b6a758a..8c9e85c64b46 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -417,6 +417,7 @@ struct reiserfs_sb_info { char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; #endif + char *s_jdev; /* Stored jdev for mount option showing */ #ifdef CONFIG_REISERFS_CHECK struct tree_balance *cur_tb; /* @@ -482,7 +483,8 @@ enum reiserfs_mount_options { REISERFS_ERROR_RO, REISERFS_ERROR_CONTINUE, - REISERFS_QUOTA, /* Some quota option specified */ + REISERFS_USRQUOTA, /* User quota option specified */ + REISERFS_GRPQUOTA, /* Group quota option specified */ REISERFS_TEST1, REISERFS_TEST2, -- cgit v1.2.3