From 2f1b6248682f8b39ca3c7e549dfc216d26c4109b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:13 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: use enum to define zones, reformat and comment Use enum for zones and reformat zones dependent information Add comments explaning the use of zones and add a zones_t type for zone numbers. Line up information that will be #ifdefd by the following patches. [akpm@osdl.org: comment cleanups] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 66 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 19 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f45163c528e8..03a5a6eb0ffa 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -88,14 +88,53 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif -#define ZONE_DMA 0 -#define ZONE_DMA32 1 -#define ZONE_NORMAL 2 -#define ZONE_HIGHMEM 3 +enum zone_type { + /* + * ZONE_DMA is used when there are devices that are not able + * to do DMA to all of addressable memory (ZONE_NORMAL). Then we + * carve out the portion of memory that is needed for these devices. + * The range is arch specific. + * + * Some examples + * + * Architecture Limit + * --------------------------- + * parisc, ia64, sparc <4G + * s390 <2G + * arm26 <48M + * arm Various + * alpha Unlimited or 0-16MB. + * + * i386, x86_64 and multiple other arches + * <16M. + */ + ZONE_DMA, + /* + * x86_64 needs two ZONE_DMAs because it supports devices that are + * only able to do DMA to the lower 16M but also 32 bit devices that + * can only do DMA areas below 4G. + */ + ZONE_DMA32, + /* + * Normal addressable memory is in ZONE_NORMAL. DMA operations can be + * performed on pages in ZONE_NORMAL if the DMA devices support + * transfers to all addressable memory. + */ + ZONE_NORMAL, + /* + * A memory area that is only addressable by the kernel through + * mapping portions into its own address space. This is for example + * used by i386 to allow the kernel to address the memory beyond + * 900MB. The kernel will set up special mappings (page + * table entries on i386) for each page that the kernel needs to + * access. + */ + ZONE_HIGHMEM, -#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ + MAX_NR_ZONES +}; +#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ /* * When a memory allocation must conform to specific limitations (such @@ -126,16 +165,6 @@ struct per_cpu_pageset { /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a 32bit PC we have 4 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_DMA32 0 MB Empty - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ - struct zone { /* Fields commonly accessed by the page allocator */ unsigned long free_pages; @@ -266,7 +295,6 @@ struct zone { char *name; } ____cacheline_internodealigned_in_smp; - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -373,12 +401,12 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } -static inline int is_highmem_idx(int idx) +static inline int is_highmem_idx(enum zone_type idx) { return (idx == ZONE_HIGHMEM); } -static inline int is_normal_idx(int idx) +static inline int is_normal_idx(enum zone_type idx) { return (idx == ZONE_NORMAL); } -- cgit v1.2.3 From fb0e7942bdcbbd2f90e61cb4cfa4fa892a873f8a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:13 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_DMA32 optional Make ZONE_DMA32 optional - Add #ifdefs around ZONE_DMA32 specific code and definitions. - Add CONFIG_ZONE_DMA32 config option and use that for x86_64 that alone needs this zone. - Remove the use of CONFIG_DMA_IS_DMA32 and CONFIG_DMA_IS_NORMAL for ia64 and fix up the way per node ZVCs are calculated. - Fall back to prior GFP_ZONEMASK of 0x03 if there is no DMA32 zone. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 03a5a6eb0ffa..adae3c915938 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -109,12 +109,14 @@ enum zone_type { * <16M. */ ZONE_DMA, +#ifdef CONFIG_ZONE_DMA32 /* * x86_64 needs two ZONE_DMAs because it supports devices that are * only able to do DMA to the lower 16M but also 32 bit devices that * can only do DMA areas below 4G. */ ZONE_DMA32, +#endif /* * Normal addressable memory is in ZONE_NORMAL. DMA operations can be * performed on pages in ZONE_NORMAL if the DMA devices support @@ -161,9 +163,13 @@ enum zone_type { * * NOTE! Make sure this matches the zones in */ -#define GFP_ZONEMASK 0x07 -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ +#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ + +#ifdef CONFIG_ZONE_DMA32 +#define GFP_ZONEMASK 0x07 +#else +#define GFP_ZONEMASK 0x03 +#endif struct zone { /* Fields commonly accessed by the page allocator */ @@ -429,7 +435,11 @@ static inline int is_normal(struct zone *zone) static inline int is_dma32(struct zone *zone) { +#ifdef CONFIG_ZONE_DMA32 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +#else + return 0; +#endif } static inline int is_dma(struct zone *zone) -- cgit v1.2.3 From e53ef38d05dd59ed281a35590e4a5b64d8ff4c52 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:14 -0700 Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_HIGHMEM optional Make ZONE_HIGHMEM optional - ifdef out code and definitions related to CONFIG_HIGHMEM - __GFP_HIGHMEM falls back to normal allocations if there is no ZONE_HIGHMEM - GFP_ZONEMASK becomes 0x01 if there is no DMA32 and no HIGHMEM zone. [jdike@addtoit.com: build fix] Signed-off-by: Jeff Dike Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index adae3c915938..76d33e688593 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -123,6 +123,7 @@ enum zone_type { * transfers to all addressable memory. */ ZONE_NORMAL, +#ifdef CONFIG_HIGHMEM /* * A memory area that is only addressable by the kernel through * mapping portions into its own address space. This is for example @@ -132,11 +133,10 @@ enum zone_type { * access. */ ZONE_HIGHMEM, - +#endif MAX_NR_ZONES }; -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ /* * When a memory allocation must conform to specific limitations (such @@ -163,12 +163,34 @@ enum zone_type { * * NOTE! Make sure this matches the zones in */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ #ifdef CONFIG_ZONE_DMA32 + +#ifdef CONFIG_HIGHMEM +#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ #define GFP_ZONEMASK 0x07 +#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ #else +#define GFP_ZONETYPES ((0x07 + 1) / 2 + 1) /* Loner */ +/* Mask __GFP_HIGHMEM */ +#define GFP_ZONEMASK 0x05 +#define ZONES_SHIFT 2 +#endif + +#else +#ifdef CONFIG_HIGHMEM + #define GFP_ZONEMASK 0x03 +#define ZONES_SHIFT 2 +#define GFP_ZONETYPES 3 + +#else + +#define GFP_ZONEMASK 0x01 +#define ZONES_SHIFT 1 +#define GFP_ZONETYPES 2 + +#endif #endif struct zone { @@ -409,7 +431,11 @@ static inline int populated_zone(struct zone *zone) static inline int is_highmem_idx(enum zone_type idx) { +#ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM); +#else + return 0; +#endif } static inline int is_normal_idx(enum zone_type idx) @@ -425,7 +451,11 @@ static inline int is_normal_idx(enum zone_type idx) */ static inline int is_highmem(struct zone *zone) { +#ifdef CONFIG_HIGHMEM return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; +#else + return 0; +#endif } static inline int is_normal(struct zone *zone) -- cgit v1.2.3 From 19655d3487001d7df0e10e9cbfc27c758b77c2b5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:19 -0700 Subject: [PATCH] linearly index zone->node_zonelists[] I wonder why we need this bitmask indexing into zone->node_zonelists[]? We always start with the highest zone and then include all lower zones if we build zonelists. Are there really cases where we need allocation from ZONE_DMA or ZONE_HIGHMEM but not ZONE_NORMAL? It seems that the current implementation of highest_zone() makes that already impossible. If we go linear on the index then gfp_zone() == highest_zone() and a lot of definitions fall by the wayside. We can now revert back to the use of gfp_zone() in mempolicy.c ;-) Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 52 +++++--------------------------------------------- 1 file changed, 5 insertions(+), 47 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 76d33e688593..7fe317164b73 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -137,60 +137,18 @@ enum zone_type { MAX_NR_ZONES }; - /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - * - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form for GFP_ZONETYPES when the left most bit is not - * a "loner", otherwise use the second. - * - * NOTE! Make sure this matches the zones in + * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -#ifdef CONFIG_ZONE_DMA32 - -#ifdef CONFIG_HIGHMEM -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -#define GFP_ZONEMASK 0x07 -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ -#else -#define GFP_ZONETYPES ((0x07 + 1) / 2 + 1) /* Loner */ -/* Mask __GFP_HIGHMEM */ -#define GFP_ZONEMASK 0x05 -#define ZONES_SHIFT 2 -#endif - -#else -#ifdef CONFIG_HIGHMEM - -#define GFP_ZONEMASK 0x03 -#define ZONES_SHIFT 2 -#define GFP_ZONETYPES 3 - +#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) +#define ZONES_SHIFT 1 #else - -#define GFP_ZONEMASK 0x01 -#define ZONES_SHIFT 1 -#define GFP_ZONETYPES 2 - -#endif +#define ZONES_SHIFT 2 #endif struct zone { @@ -360,7 +318,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; + struct zonelist node_zonelists[MAX_NR_ZONES]; int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; -- cgit v1.2.3 From 8417bba4b151346ed475fcc923693c9e3be89063 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:51 -0700 Subject: [PATCH] Replace min_unmapped_ratio by min_unmapped_pages in struct zone *_pages is a better description of the role of the variable. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7fe317164b73..a703527e2b45 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -169,7 +169,7 @@ struct zone { /* * zone reclaim becomes active if more unmapped pages exist. */ - unsigned long min_unmapped_ratio; + unsigned long min_unmapped_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; -- cgit v1.2.3 From 972d1a7b140569084439a81265a0f15b74e924e0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:51 -0700 Subject: [PATCH] ZVC: Support NR_SLAB_RECLAIMABLE / NR_SLAB_UNRECLAIMABLE Remove the atomic counter for slab_reclaim_pages and replace the counter and NR_SLAB with two ZVC counter that account for unreclaimable and reclaimable slab pages: NR_SLAB_RECLAIMABLE and NR_SLAB_UNRECLAIMABLE. Change the check in vmscan.c to refer to to NR_SLAB_RECLAIMABLE. The intend seems to be to check for slab pages that could be freed. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a703527e2b45..08c41b9f92e0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -51,7 +51,8 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_FILE_DIRTY, NR_WRITEBACK, -- cgit v1.2.3 From 0ff38490c836dc379ff7ec45b10a15a662f4e5f6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:52 -0700 Subject: [PATCH] zone_reclaim: dynamic slab reclaim Currently one can enable slab reclaim by setting an explicit option in /proc/sys/vm/zone_reclaim_mode. Slab reclaim is then used as a final option if the freeing of unmapped file backed pages is not enough to free enough pages to allow a local allocation. However, that means that the slab can grow excessively and that most memory of a node may be used by slabs. We have had a case where a machine with 46GB of memory was using 40-42GB for slab. Zone reclaim was effective in dealing with pagecache pages. However, slab reclaim was only done during global reclaim (which is a bit rare on NUMA systems). This patch implements slab reclaim during zone reclaim. Zone reclaim occurs if there is a danger of an off node allocation. At that point we 1. Shrink the per node page cache if the number of pagecache pages is more than min_unmapped_ratio percent of pages in a zone. 2. Shrink the slab cache if the number of the nodes reclaimable slab pages (patch depends on earlier one that implements that counter) are more than min_slab_ratio (a new /proc/sys/vm tunable). The shrinking of the slab cache is a bit problematic since it is not node specific. So we simply calculate what point in the slab we want to reach (current per node slab use minus the number of pages that neeed to be allocated) and then repeately run the global reclaim until that is unsuccessful or we have reached the limit. I hope we will have zone based slab reclaim at some point which will make that easier. The default for the min_slab_ratio is 5% Also remove the slab option from /proc/sys/vm/zone_reclaim_mode. [akpm@osdl.org: cleanups] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 08c41b9f92e0..3693f1a52788 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -171,6 +171,7 @@ struct zone { * zone reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; + unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -448,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include /* Returns the number of the current Node. */ -- cgit v1.2.3 From c713216deebd95d2b0ab38fef8bb2361c0180c2d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Sep 2006 01:49:43 -0700 Subject: [PATCH] Introduce mechanism for registering active regions of memory At a basic level, architectures define structures to record where active ranges of page frames are located. Once located, the code to calculate zone sizes and holes in each architecture is very similar. Some of this zone and hole sizing code is difficult to read for no good reason. This set of patches eliminates the similar-looking architecture-specific code. The patches introduce a mechanism where architectures register where the active ranges of page frames are with add_active_range(). When all areas have been discovered, free_area_init_nodes() is called to initialise the pgdat and zones. The zone sizes and holes are then calculated in an architecture independent manner. Patch 1 introduces the mechanism for registering and initialising PFN ranges Patch 2 changes ppc to use the mechanism - 139 arch-specific LOC removed Patch 3 changes x86 to use the mechanism - 136 arch-specific LOC removed Patch 4 changes x86_64 to use the mechanism - 74 arch-specific LOC removed Patch 5 changes ia64 to use the mechanism - 52 arch-specific LOC removed Patch 6 accounts for mem_map as a memory hole as the pages are not reclaimable. It adjusts the watermarks slightly Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig, gensparse_defconfig and defconfig. Bob Picco has also tested and debugged on IA64. Jack Steiner successfully boot tested on a mammoth SGI IA64-based machine. These were on patches against 2.6.17-rc1 and release 3 of these patches but there have been no ia64-changes since release 3. There are differences in the zone sizes for x86_64 as the arch-specific code for x86_64 accounts the kernel image and the starting mem_maps as memory holes but the architecture-independent code accounts the memory as present. The big benefit of this set of patches is a sizable reduction of architecture-specific code, some of which is very hairy. There should be a greater reduction when other architectures use the same mechanisms for zone and hole sizing but I lack the hardware to test on. Additional credit; Dave Hansen for the initial suggestion and comments on early patches Andy Whitcroft for reviewing early versions and catching numerous errors Tony Luck for testing and debugging on IA64 Bob Picco for fixing bugs related to pfn registration, reviewing a number of patch revisions, providing a number of suggestions on future direction and testing heavily Jack Steiner and Robin Holt for testing on IA64 and clarifying issues related to memory holes Yasunori for testing on IA64 Andi Kleen for reviewing and feeding back about x86_64 Christian Kujau for providing valuable information related to ACPI problems on x86_64 and testing potential fixes This patch: Define the structure to represent an active range of page frames within a node in an architecture independent manner. Architectures are expected to register active ranges of PFNs using add_active_range(nid, start_pfn, end_pfn) and call free_area_init_nodes() passing the PFNs of the end of each zone. Signed-off-by: Mel Gorman Signed-off-by: Bob Picco Cc: Dave Hansen Cc: Andy Whitcroft Cc: Andi Kleen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Keith Mannthey" Cc: "Luck, Tony" Cc: KAMEZAWA Hiroyuki Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3693f1a52788..7fa1cbe9fa7a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -305,6 +305,13 @@ struct zonelist { struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited }; +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +struct node_active_region { + unsigned long start_pfn; + unsigned long end_pfn; + int nid; +}; +#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ /* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM @@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone); #endif -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ + !defined(CONFIG_ARCH_POPULATES_NODE_MAP) #define early_pfn_to_nid(nid) (0UL) #endif -- cgit v1.2.3 From e129b5c23c2b471d47f1c5d2b8b193fc2034af43 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Sep 2006 01:50:00 -0700 Subject: [PATCH] vm: add per-zone writeout counter The VM is supposed to minimise the number of pages which get written off the LRU (for IO scheduling efficiency, and for high reclaim-success rates). But we don't actually have a clear way of showing how true this is. So add `nr_vmscan_write' to /proc/vmstat and /proc/zoneinfo - the number of pages which have been written by the vm scanner in this zone and globally. Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7fa1cbe9fa7a..1b0680cd84d2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -58,6 +58,7 @@ enum zone_stat_item { NR_WRITEBACK, NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, + NR_VMSCAN_WRITE, #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ -- cgit v1.2.3 From 5b99cd0effaf846240a15441aec459a592577eaf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 27 Sep 2006 01:50:01 -0700 Subject: [PATCH] own header file for struct page This moves the definition of struct page from mm.h to its own header file page-struct.h. This is a prereq to fix SetPageUptodate which is broken on s390: #define SetPageUptodate(_page) do { struct page *__page = (_page); if (!test_and_set_bit(PG_uptodate, &__page->flags)) page_test_and_clear_dirty(_page); } while (0) _page gets used twice in this macro which can cause subtle bugs. Using __page for the page_test_and_clear_dirty call doesn't work since it causes yet another problem with the page_test_and_clear_dirty macro as well. In order to avoid all these problems caused by macros it seems to be a good idea to get rid of them and convert them to static inline functions. Because of header file include order it's necessary to have a seperate header file for the struct page definition. Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b0680cd84d2..562cf7a8f3ee 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -314,6 +314,11 @@ struct node_active_region { }; #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + /* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM * (mostly NUMA machines?) to denote a higher-level memory zone than the -- cgit v1.2.3 From d5f541ed6e31518508c688912e7464facf253c87 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 27 Sep 2006 01:50:08 -0700 Subject: [PATCH] Add node to zone for the NUMA case Add the node in order to optimize zone_to_nid. Signed-off-by: Christoph Lameter Acked-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 562cf7a8f3ee..59855b8718a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -168,6 +168,7 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA + int node; /* * zone reclaim becomes active if more unmapped pages exist. */ -- cgit v1.2.3