diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 73 | ||||
-rw-r--r-- | mm/memory.c | 58 | ||||
-rw-r--r-- | mm/mmu_context.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 138 | ||||
-rw-r--r-- | mm/page_cgroup.c | 1 | ||||
-rw-r--r-- | mm/slab.c | 266 | ||||
-rw-r--r-- | mm/swap.c | 33 | ||||
-rw-r--r-- | mm/vmalloc.c | 14 | ||||
-rw-r--r-- | mm/vmscan.c | 18 | ||||
-rw-r--r-- | mm/vmstat.c | 6 |
12 files changed, 428 insertions, 185 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 011b110365c8..0526445c9b35 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -307,7 +307,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" - depends on X86 && MMU + depends on X86 && MMU && !PREEMPT_RT_FULL select COMPACTION help Transparent Hugepages allows the kernel to use huge pages and diff --git a/mm/filemap.c b/mm/filemap.c index 03c5b0ec2b78..4a30d36ca4c8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2044,7 +2044,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, char *kaddr; size_t copied; - BUG_ON(!in_atomic()); + BUG_ON(!pagefault_disabled()); kaddr = kmap_atomic(page, KM_USER0); if (likely(i->nr_segs == 1)) { int left; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 778554ffd639..abf5497cadcb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -683,37 +683,32 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, return total; } -static bool __memcg_event_check(struct mem_cgroup *memcg, int target) +static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, + enum mem_cgroup_events_target target) { unsigned long val, next; val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ - return ((long)next - (long)val < 0); -} - -static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target) -{ - unsigned long val, next; - - val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); - - switch (target) { - case MEM_CGROUP_TARGET_THRESH: - next = val + THRESHOLDS_EVENTS_TARGET; - break; - case MEM_CGROUP_TARGET_SOFTLIMIT: - next = val + SOFTLIMIT_EVENTS_TARGET; - break; - case MEM_CGROUP_TARGET_NUMAINFO: - next = val + NUMAINFO_EVENTS_TARGET; - break; - default: - return; + if ((long)next - (long)val < 0) { + switch (target) { + case MEM_CGROUP_TARGET_THRESH: + next = val + THRESHOLDS_EVENTS_TARGET; + break; + case MEM_CGROUP_TARGET_SOFTLIMIT: + next = val + SOFTLIMIT_EVENTS_TARGET; + break; + case MEM_CGROUP_TARGET_NUMAINFO: + next = val + NUMAINFO_EVENTS_TARGET; + break; + default: + break; + } + __this_cpu_write(memcg->stat->targets[target], next); + return true; } - - __this_cpu_write(memcg->stat->targets[target], next); + return false; } /* @@ -724,25 +719,27 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { preempt_disable(); /* threshold event is triggered in finer grain than soft limit */ - if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) { + if (unlikely(mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_THRESH))) { + bool do_softlimit, do_numainfo; + + do_softlimit = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_SOFTLIMIT); +#if MAX_NUMNODES > 1 + do_numainfo = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_NUMAINFO); +#endif + preempt_enable(); + mem_cgroup_threshold(memcg); - __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH); - if (unlikely(__memcg_event_check(memcg, - MEM_CGROUP_TARGET_SOFTLIMIT))) { + if (unlikely(do_softlimit)) mem_cgroup_update_tree(memcg, page); - __mem_cgroup_target_update(memcg, - MEM_CGROUP_TARGET_SOFTLIMIT); - } #if MAX_NUMNODES > 1 - if (unlikely(__memcg_event_check(memcg, - MEM_CGROUP_TARGET_NUMAINFO))) { + if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); - __mem_cgroup_target_update(memcg, - MEM_CGROUP_TARGET_NUMAINFO); - } #endif - } - preempt_enable(); + } else + preempt_enable(); } static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) diff --git a/mm/memory.c b/mm/memory.c index 1b1ca176397e..a3ae5e7b0bfc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3444,6 +3444,32 @@ unlock: return 0; } +#ifdef CONFIG_PREEMPT_RT_FULL +void pagefault_disable(void) +{ + migrate_disable(); + current->pagefault_disabled++; + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} +EXPORT_SYMBOL_GPL(pagefault_disable); + +void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + current->pagefault_disabled--; + migrate_enable(); +} +EXPORT_SYMBOL_GPL(pagefault_enable); +#endif + /* * By the time we get here, we already hold the mm semaphore */ @@ -3992,3 +4018,35 @@ void copy_user_huge_page(struct page *dst, struct page *src, } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ + +#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0) +/* + * Heinous hack, relies on the caller doing something like: + * + * pte = alloc_pages(PGALLOC_GFP, 0); + * if (pte) + * pgtable_page_ctor(pte); + * return pte; + * + * This ensures we release the page and return NULL when the + * lock allocation fails. + */ +struct page *pte_lock_init(struct page *page) +{ + page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (page->ptl) { + spin_lock_init(__pte_lockptr(page)); + } else { + __free_page(page); + page = NULL; + } + return page; +} + +void pte_lock_deinit(struct page *page) +{ + kfree(page->ptl); + page->mapping = NULL; +} + +#endif diff --git a/mm/mmu_context.c b/mm/mmu_context.c index cf332bc0080a..64ce279a984d 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); + local_irq_disable_rt(); active_mm = tsk->active_mm; if (active_mm != mm) { atomic_inc(&mm->mm_count); @@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm) } tsk->mm = mm; switch_mm(active_mm, mm, tsk); + local_irq_enable_rt(); task_unlock(tsk); if (active_mm != mm) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 485be8976bba..49675c704a5b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -57,6 +57,7 @@ #include <linux/ftrace_event.h> #include <linux/memcontrol.h> #include <linux/prefetch.h> +#include <linux/locallock.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -222,6 +223,18 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); + +#ifdef CONFIG_PREEMPT_RT_BASE +# define cpu_lock_irqsave(cpu, flags) \ + spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags) +# define cpu_unlock_irqrestore(cpu, flags) \ + spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags) +#else +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) +#endif + int page_group_by_mobility_disabled __read_mostly; static void set_pageblock_migratetype(struct page *page, int migratetype) @@ -581,7 +594,7 @@ static inline int free_pages_check(struct page *page) } /* - * Frees a number of pages from the PCP lists + * Frees a number of pages which have been collected from the pcp lists. * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * @@ -592,16 +605,42 @@ static inline int free_pages_check(struct page *page) * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, - struct per_cpu_pages *pcp) + struct list_head *list) { - int migratetype = 0; - int batch_free = 0; int to_free = count; + unsigned long flags; - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); zone->all_unreclaimable = 0; zone->pages_scanned = 0; + while (!list_empty(list)) { + struct page *page = list_first_entry(list, struct page, lru); + + /* must delete as __free_one_page list manipulates */ + list_del(&page->lru); + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ + __free_one_page(page, zone, 0, page_private(page)); + trace_mm_page_pcpu_drain(page, 0, page_private(page)); + to_free--; + } + WARN_ON(to_free != 0); + __mod_zone_page_state(zone, NR_FREE_PAGES, count); + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * Moves a number of pages from the PCP lists to free list which + * is freed outside of the locked region. + * + * Assumes all pages on list are in same zone, and of same order. + * count is the number of pages to free. + */ +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src, + struct list_head *dst) +{ + int migratetype = 0, batch_free = 0; + while (to_free) { struct page *page; struct list_head *list; @@ -617,7 +656,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; - list = &pcp->lists[migratetype]; + list = &src->lists[migratetype]; } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ @@ -625,28 +664,25 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free = to_free; do { - page = list_entry(list->prev, struct page, lru); - /* must delete as __free_one_page list manipulates */ + page = list_last_entry(list, struct page, lru); list_del(&page->lru); - /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ - __free_one_page(page, zone, 0, page_private(page)); - trace_mm_page_pcpu_drain(page, 0, page_private(page)); + list_add(&page->lru, dst); } while (--to_free && --batch_free && !list_empty(list)); } - __mod_zone_page_state(zone, NR_FREE_PAGES, count); - spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, struct page *page, int order, int migratetype) { - spin_lock(&zone->lock); + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); zone->all_unreclaimable = 0; zone->pages_scanned = 0; __free_one_page(page, zone, order, migratetype); __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static bool free_pages_prepare(struct page *page, unsigned int order) @@ -683,13 +719,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (!free_pages_prepare(page, order)) return; - local_irq_save(flags); + local_lock_irqsave(pa_lock, flags); if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, order, get_pageblock_migratetype(page)); - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); } /* @@ -1065,16 +1101,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; + LIST_HEAD(dst); int to_drain; - local_irq_save(flags); + local_lock_irqsave(pa_lock, flags); if (pcp->count >= pcp->batch) to_drain = pcp->batch; else to_drain = pcp->count; - free_pcppages_bulk(zone, to_drain, pcp); + isolate_pcp_pages(to_drain, pcp, &dst); pcp->count -= to_drain; - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); + free_pcppages_bulk(zone, to_drain, &dst); } #endif @@ -1093,16 +1131,21 @@ static void drain_pages(unsigned int cpu) for_each_populated_zone(zone) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; + LIST_HEAD(dst); + int count; - local_irq_save(flags); + cpu_lock_irqsave(cpu, flags); pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - if (pcp->count) { - free_pcppages_bulk(zone, pcp->count, pcp); + count = pcp->count; + if (count) { + isolate_pcp_pages(count, pcp, &dst); pcp->count = 0; } - local_irq_restore(flags); + cpu_unlock_irqrestore(cpu, flags); + if (count) + free_pcppages_bulk(zone, count, &dst); } } @@ -1119,7 +1162,14 @@ void drain_local_pages(void *arg) */ void drain_all_pages(void) { +#ifndef CONFIG_PREEMPT_RT_BASE on_each_cpu(drain_local_pages, NULL, 1); +#else + int i; + + for_each_online_cpu(i) + drain_pages(i); +#endif } #ifdef CONFIG_HIBERNATION @@ -1175,7 +1225,7 @@ void free_hot_cold_page(struct page *page, int cold) migratetype = get_pageblock_migratetype(page); set_page_private(page, migratetype); - local_irq_save(flags); + local_lock_irqsave(pa_lock, flags); if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_event(PGFREE); @@ -1202,12 +1252,19 @@ void free_hot_cold_page(struct page *page, int cold) list_add(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { - free_pcppages_bulk(zone, pcp->batch, pcp); + LIST_HEAD(dst); + int count; + + isolate_pcp_pages(pcp->batch, pcp, &dst); pcp->count -= pcp->batch; + count = pcp->batch; + local_unlock_irqrestore(pa_lock, flags); + free_pcppages_bulk(zone, count, &dst); + return; } out: - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); } /* @@ -1302,7 +1359,7 @@ again: struct per_cpu_pages *pcp; struct list_head *list; - local_irq_save(flags); + local_lock_irqsave(pa_lock, flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; if (list_empty(list)) { @@ -1334,17 +1391,19 @@ again: */ WARN_ON_ONCE(order > 1); } - spin_lock_irqsave(&zone->lock, flags); + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); page = __rmqueue(zone, order, migratetype); - spin_unlock(&zone->lock); - if (!page) + if (!page) { + spin_unlock(&zone->lock); goto failed; + } __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); + spin_unlock(&zone->lock); } __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) @@ -1352,7 +1411,7 @@ again: return page; failed: - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); return NULL; } @@ -1901,8 +1960,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, if (*did_some_progress != COMPACT_SKIPPED) { /* Page migration frees to the PCP lists but we want merging */ - drain_pages(get_cpu()); - put_cpu(); + drain_pages(get_cpu_light()); + put_cpu_light(); page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, @@ -3680,14 +3739,16 @@ static int __zone_pcp_update(void *data) for_each_possible_cpu(cpu) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; + LIST_HEAD(dst); pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - local_irq_save(flags); - free_pcppages_bulk(zone, pcp->count, pcp); + cpu_lock_irqsave(cpu, flags); + isolate_pcp_pages(pcp->count, pcp, &dst); + free_pcppages_bulk(zone, pcp->count, &dst); setup_pageset(pset, batch); - local_irq_restore(flags); + cpu_unlock_irqrestore(cpu, flags); } return 0; } @@ -5053,6 +5114,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, void __init page_alloc_init(void) { hotcpu_notifier(page_alloc_cpu_notify, 0); + local_irq_lock_init(pa_lock); } /* diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 2d123f94a8df..2e0d18d8e432 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -17,6 +17,7 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) set_page_cgroup_array_id(pc, id); pc->mem_cgroup = NULL; INIT_LIST_HEAD(&pc->lru); + page_cgroup_lock_init(pc); } static unsigned long total_usage; diff --git a/mm/slab.c b/mm/slab.c index 83311c9aaf9d..5f0c5ef303d4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -116,6 +116,7 @@ #include <linux/kmemcheck.h> #include <linux/memory.h> #include <linux/prefetch.h> +#include <linux/locallock.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -607,6 +608,12 @@ int slab_is_available(void) return g_cpucache_up >= EARLY; } +/* + * Guard access to the cache-chain. + */ +static DEFINE_MUTEX(cache_chain_mutex); +static struct list_head cache_chain; + #ifdef CONFIG_LOCKDEP /* @@ -668,38 +675,41 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) slab_set_debugobj_lock_classes_node(cachep, node); } -static void init_node_lock_keys(int q) +static void init_lock_keys(struct kmem_cache *cachep, int node) { - struct cache_sizes *s = malloc_sizes; + struct kmem_list3 *l3; if (g_cpucache_up < LATE) return; - for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { - struct kmem_list3 *l3; + l3 = cachep->nodelists[node]; + if (!l3 || OFF_SLAB(cachep)) + return; - l3 = s->cs_cachep->nodelists[q]; - if (!l3 || OFF_SLAB(s->cs_cachep)) - continue; + slab_set_lock_classes(cachep, &on_slab_l3_key, &on_slab_alc_key, node); +} - slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, - &on_slab_alc_key, q); - } +static void init_node_lock_keys(int node) +{ + struct kmem_cache *cachep; + + list_for_each_entry(cachep, &cache_chain, next) + init_lock_keys(cachep, node); } -static inline void init_lock_keys(void) +static inline void init_cachep_lock_keys(struct kmem_cache *cachep) { int node; for_each_node(node) - init_node_lock_keys(node); + init_lock_keys(cachep, node); } #else -static void init_node_lock_keys(int q) +static void init_node_lock_keys(int node) { } -static inline void init_lock_keys(void) +static void init_cachep_lock_keys(struct kmem_cache *cachep) { } @@ -712,19 +722,67 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) } #endif +static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); +static DEFINE_PER_CPU(struct list_head, slab_free_list); +static DEFINE_LOCAL_IRQ_LOCK(slab_lock); + +#ifndef CONFIG_PREEMPT_RT_BASE +# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1) +#else /* - * Guard access to the cache-chain. + * execute func() for all CPUs. On PREEMPT_RT we dont actually have + * to run on the remote CPUs - we only have to take their CPU-locks. + * (This is a rare operation, so cacheline bouncing is not an issue.) */ -static DEFINE_MUTEX(cache_chain_mutex); -static struct list_head cache_chain; +static void +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) +{ + unsigned int i; -static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); + for_each_online_cpu(i) + func(arg, i); +} +#endif + +static void free_delayed(struct list_head *h) +{ + while(!list_empty(h)) { + struct page *page = list_first_entry(h, struct page, lru); + + list_del(&page->lru); + __free_pages(page, page->index); + } +} + +static void unlock_l3_and_free_delayed(spinlock_t *list_lock) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_spin_unlock_irq(slab_lock, list_lock); + free_delayed(&tmp); +} + +static void unlock_slab_and_free_delayed(unsigned long flags) +{ + LIST_HEAD(tmp); + + list_splice_init(&__get_cpu_var(slab_free_list), &tmp); + local_unlock_irqrestore(slab_lock, flags); + free_delayed(&tmp); +} static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; } +static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep, + int cpu) +{ + return cachep->array[cpu]; +} + static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) { @@ -1062,9 +1120,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) if (l3->alien) { struct array_cache *ac = l3->alien[node]; - if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { + if (ac && ac->avail && + local_spin_trylock_irq(slab_lock, &ac->lock)) { __drain_alien_cache(cachep, ac, node); - spin_unlock_irq(&ac->lock); + local_spin_unlock_irq(slab_lock, &ac->lock); } } } @@ -1079,9 +1138,9 @@ static void drain_alien_cache(struct kmem_cache *cachep, for_each_online_node(i) { ac = alien[i]; if (ac) { - spin_lock_irqsave(&ac->lock, flags); + local_spin_lock_irqsave(slab_lock, &ac->lock, flags); __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags); } } } @@ -1160,11 +1219,11 @@ static int init_cache_nodelists_node(int node) cachep->nodelists[node] = l3; } - spin_lock_irq(&cachep->nodelists[node]->list_lock); + local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock); cachep->nodelists[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock); } return 0; } @@ -1189,7 +1248,7 @@ static void __cpuinit cpuup_canceled(long cpu) if (!l3) goto free_array_cache; - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; @@ -1197,7 +1256,7 @@ static void __cpuinit cpuup_canceled(long cpu) free_block(cachep, nc->entry, nc->avail, node); if (!cpumask_empty(mask)) { - spin_unlock_irq(&l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); goto free_array_cache; } @@ -1211,7 +1270,7 @@ static void __cpuinit cpuup_canceled(long cpu) alien = l3->alien; l3->alien = NULL; - spin_unlock_irq(&l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); kfree(shared); if (alien) { @@ -1285,7 +1344,7 @@ static int __cpuinit cpuup_prepare(long cpu) l3 = cachep->nodelists[node]; BUG_ON(!l3); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (!l3->shared) { /* * We are serialised from CPU_DEAD or @@ -1300,7 +1359,7 @@ static int __cpuinit cpuup_prepare(long cpu) alien = NULL; } #endif - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); kfree(shared); free_alien_cache(alien); if (cachep->flags & SLAB_DEBUG_OBJECTS) @@ -1491,6 +1550,10 @@ void __init kmem_cache_init(void) if (num_possible_nodes() == 1) use_alien_caches = 0; + local_irq_lock_init(slab_lock); + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(slab_free_list, i)); + for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); if (i < MAX_NUMNODES) @@ -1669,14 +1732,13 @@ void __init kmem_cache_init_late(void) g_cpucache_up = LATE; - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); - /* 6) resize the head arrays to their final sizes */ mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) + list_for_each_entry(cachep, &cache_chain, next) { + init_cachep_lock_keys(cachep); if (enable_cpucache(cachep, GFP_NOWAIT)) BUG(); + } mutex_unlock(&cache_chain_mutex); /* Done! */ @@ -1769,12 +1831,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed) { unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); + struct page *page, *basepage = virt_to_page(addr); const unsigned long nr_freed = i; + page = basepage; + kmemcheck_free_shadow(page, cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1790,7 +1854,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) } if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; - free_pages((unsigned long)addr, cachep->gfporder); + + if (!delayed) { + free_pages((unsigned long)addr, cachep->gfporder); + } else { + basepage->index = cachep->gfporder; + list_add(&basepage->lru, &__get_cpu_var(slab_free_list)); + } } static void kmem_rcu_free(struct rcu_head *head) @@ -1798,7 +1868,7 @@ static void kmem_rcu_free(struct rcu_head *head) struct slab_rcu *slab_rcu = (struct slab_rcu *)head; struct kmem_cache *cachep = slab_rcu->cachep; - kmem_freepages(cachep, slab_rcu->addr); + kmem_freepages(cachep, slab_rcu->addr, false); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slab_rcu); } @@ -2017,7 +2087,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp, + bool delayed) { void *addr = slabp->s_mem - slabp->colouroff; @@ -2030,7 +2101,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) slab_rcu->addr = addr; call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); + kmem_freepages(cachep, addr, delayed); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slabp); } @@ -2479,6 +2550,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, slab_set_debugobj_lock_classes(cachep); } + init_cachep_lock_keys(cachep); + /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); oops: @@ -2496,7 +2569,7 @@ EXPORT_SYMBOL(kmem_cache_create); #if DEBUG static void check_irq_off(void) { - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } static void check_irq_on(void) @@ -2531,26 +2604,43 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, struct array_cache *ac, int force, int node); -static void do_drain(void *arg) +static void __do_drain(void *arg, unsigned int cpu) { struct kmem_cache *cachep = arg; struct array_cache *ac; - int node = numa_mem_id(); + int node = cpu_to_mem(cpu); - check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get_on_cpu(cachep, cpu); spin_lock(&cachep->nodelists[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_drain(void *arg) +{ + __do_drain(arg, smp_processor_id()); +} +#else +static void do_drain(void *arg, int cpu) +{ + LIST_HEAD(tmp); + + spin_lock_irq(&per_cpu(slab_lock, cpu).lock); + __do_drain(arg, cpu); + list_splice_init(&per_cpu(slab_free_list, cpu), &tmp); + spin_unlock_irq(&per_cpu(slab_lock, cpu).lock); + free_delayed(&tmp); +} +#endif + static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1); + slab_on_each_cpu(do_drain, cachep); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -2581,10 +2671,10 @@ static int drain_freelist(struct kmem_cache *cache, nr_freed = 0; while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); p = l3->slabs_free.prev; if (p == &l3->slabs_free) { - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); goto out; } @@ -2598,8 +2688,8 @@ static int drain_freelist(struct kmem_cache *cache, * to the cache. */ l3->free_objects -= cache->num; - spin_unlock_irq(&l3->list_lock); - slab_destroy(cache, slabp); + local_spin_unlock_irq(slab_lock, &l3->list_lock); + slab_destroy(cache, slabp, false); nr_freed++; } out: @@ -2893,7 +2983,7 @@ static int cache_grow(struct kmem_cache *cachep, offset *= cachep->colour_off; if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); /* * The test for missing atomic flag is performed here, rather than @@ -2923,7 +3013,7 @@ static int cache_grow(struct kmem_cache *cachep, cache_init_objs(cachep, slabp); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); check_irq_off(); spin_lock(&l3->list_lock); @@ -2934,10 +3024,10 @@ static int cache_grow(struct kmem_cache *cachep, spin_unlock(&l3->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, objp, false); failed: if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); return 0; } @@ -3329,11 +3419,11 @@ retry: * set and go into memory reserves if necessary. */ if (local_flags & __GFP_WAIT) - local_irq_enable(); + local_unlock_irq(slab_lock); kmem_flagcheck(cache, flags); obj = kmem_getpages(cache, local_flags, numa_mem_id()); if (local_flags & __GFP_WAIT) - local_irq_disable(); + local_lock_irq(slab_lock); if (obj) { /* * Insert into the appropriate per node queues @@ -3449,7 +3539,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); if (nodeid == NUMA_NO_NODE) nodeid = slab_node; @@ -3474,7 +3564,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, /* ___cache_alloc_node can fall back to other nodes */ ptr = ____cache_alloc_node(cachep, flags, nodeid); out: - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, flags); @@ -3534,9 +3624,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + local_lock_irqsave(slab_lock, save_flags); objp = __do_cache_alloc(cachep, flags); - local_irq_restore(save_flags); + local_unlock_irqrestore(slab_lock, save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, flags); @@ -3584,7 +3674,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, slabp, true); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3847,12 +3937,12 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; - local_irq_save(flags); debug_check_no_locks_freed(objp, obj_size(cachep)); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(objp, obj_size(cachep)); + local_lock_irqsave(slab_lock, flags); __cache_free(cachep, objp, __builtin_return_address(0)); - local_irq_restore(flags); + unlock_slab_and_free_delayed(flags); trace_kmem_cache_free(_RET_IP_, objp); } @@ -3876,13 +3966,13 @@ void kfree(const void *objp) if (unlikely(ZERO_OR_NULL_PTR(objp))) return; - local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); debug_check_no_obj_freed(objp, obj_size(c)); + local_lock_irqsave(slab_lock, flags); __cache_free(c, (void *)objp, __builtin_return_address(0)); - local_irq_restore(flags); + unlock_slab_and_free_delayed(flags); } EXPORT_SYMBOL(kfree); @@ -3925,7 +4015,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) if (l3) { struct array_cache *shared = l3->shared; - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (shared) free_block(cachep, shared->entry, @@ -3938,7 +4028,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&l3->list_lock); + unlock_l3_and_free_delayed(&l3->list_lock); + kfree(shared); free_alien_cache(new_alien); continue; @@ -3985,17 +4076,30 @@ struct ccupdate_struct { struct array_cache *new[0]; }; -static void do_ccupdate_local(void *info) +static void __do_ccupdate_local(void *info, int cpu) { struct ccupdate_struct *new = info; struct array_cache *old; - check_irq_off(); - old = cpu_cache_get(new->cachep); + old = cpu_cache_get_on_cpu(new->cachep, cpu); + + new->cachep->array[cpu] = new->new[cpu]; + new->new[cpu] = old; +} - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; +#ifndef CONFIG_PREEMPT_RT_BASE +static void do_ccupdate_local(void *info) +{ + __do_ccupdate_local(info, smp_processor_id()); } +#else +static void do_ccupdate_local(void *info, int cpu) +{ + spin_lock_irq(&per_cpu(slab_lock, cpu).lock); + __do_ccupdate_local(info, cpu); + spin_unlock_irq(&per_cpu(slab_lock, cpu).lock); +} +#endif /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, @@ -4021,7 +4125,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1); + slab_on_each_cpu(do_ccupdate_local, (void *)new); check_irq_on(); cachep->batchcount = batchcount; @@ -4032,9 +4136,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + local_spin_lock_irq(slab_lock, + &cachep->nodelists[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + + unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new); @@ -4110,7 +4216,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) @@ -4120,7 +4226,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } } @@ -4259,7 +4365,7 @@ static int s_show(struct seq_file *m, void *p) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); list_for_each_entry(slabp, &l3->slabs_full, list) { if (slabp->inuse != cachep->num && !error) @@ -4284,7 +4390,7 @@ static int s_show(struct seq_file *m, void *p) if (l3->shared) shared_avail += l3->shared->avail; - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4513,13 +4619,13 @@ static int leaks_show(struct seq_file *m, void *p) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + local_spin_lock_irq(slab_lock, &l3->list_lock); list_for_each_entry(slabp, &l3->slabs_full, list) handle_slab(n, cachep, slabp); list_for_each_entry(slabp, &l3->slabs_partial, list) handle_slab(n, cachep, slabp); - spin_unlock_irq(&l3->list_lock); + local_spin_unlock_irq(slab_lock, &l3->list_lock); } name = cachep->name; if (n[0] == n[1]) { diff --git a/mm/swap.c b/mm/swap.c index 55b266dcfbeb..e3f7d6fa3192 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -31,6 +31,7 @@ #include <linux/backing-dev.h> #include <linux/memcontrol.h> #include <linux/gfp.h> +#include <linux/locallock.h> #include "internal.h" @@ -41,6 +42,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); +static DEFINE_LOCAL_IRQ_LOCK(swap_lock); + /* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. @@ -267,11 +271,11 @@ void rotate_reclaimable_page(struct page *page) unsigned long flags; page_cache_get(page); - local_irq_save(flags); + local_lock_irqsave(rotate_lock, flags); pvec = &__get_cpu_var(lru_rotate_pvecs); if (!pagevec_add(pvec, page)) pagevec_move_tail(pvec); - local_irq_restore(flags); + local_unlock_irqrestore(rotate_lock, flags); } } @@ -327,12 +331,13 @@ static void activate_page_drain(int cpu) void activate_page(struct page *page) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); + struct pagevec *pvec = &get_locked_var(swap_lock, + activate_page_pvecs); page_cache_get(page); if (!pagevec_add(pvec, page)) pagevec_lru_move_fn(pvec, __activate_page, NULL); - put_cpu_var(activate_page_pvecs); + put_locked_var(swap_lock, activate_page_pvecs); } } @@ -373,12 +378,12 @@ EXPORT_SYMBOL(mark_page_accessed); void __lru_cache_add(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; + struct pagevec *pvec = &get_locked_var(swap_lock, lru_add_pvecs)[lru]; page_cache_get(page); if (!pagevec_add(pvec, page)) ____pagevec_lru_add(pvec, lru); - put_cpu_var(lru_add_pvecs); + put_locked_var(swap_lock, lru_add_pvecs); } EXPORT_SYMBOL(__lru_cache_add); @@ -512,9 +517,9 @@ static void drain_cpu_pagevecs(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ - local_irq_save(flags); + local_lock_irqsave(rotate_lock, flags); pagevec_move_tail(pvec); - local_irq_restore(flags); + local_unlock_irqrestore(rotate_lock, flags); } pvec = &per_cpu(lru_deactivate_pvecs, cpu); @@ -542,18 +547,19 @@ void deactivate_page(struct page *page) return; if (likely(get_page_unless_zero(page))) { - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); + struct pagevec *pvec = &get_locked_var(swap_lock, + lru_deactivate_pvecs); if (!pagevec_add(pvec, page)) pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); - put_cpu_var(lru_deactivate_pvecs); + put_locked_var(swap_lock, lru_deactivate_pvecs); } } void lru_add_drain(void) { - drain_cpu_pagevecs(get_cpu()); - put_cpu(); + drain_cpu_pagevecs(local_lock_cpu(swap_lock)); + local_unlock_cpu(swap_lock); } static void lru_add_drain_per_cpu(struct work_struct *dummy) @@ -783,6 +789,9 @@ void __init swap_setup(void) { unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); + local_irq_lock_init(rotate_lock); + local_irq_lock_init(swap_lock); + #ifdef CONFIG_SWAP bdi_init(swapper_space.backing_dev_info); #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 27be2f0d4cb7..071155a6f83b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -782,7 +782,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; - int node, err; + int node, err, cpu; node = numa_node_id(); @@ -821,12 +821,13 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) BUG_ON(err); radix_tree_preload_end(); - vbq = &get_cpu_var(vmap_block_queue); + cpu = get_cpu_light(); + vbq = &__get_cpu_var(vmap_block_queue); vb->vbq = vbq; spin_lock(&vbq->lock); list_add_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); - put_cpu_var(vmap_block_queue); + put_cpu_light(); return vb; } @@ -900,7 +901,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) struct vmap_block *vb; unsigned long addr = 0; unsigned int order; - int purge = 0; + int purge = 0, cpu; BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); @@ -908,7 +909,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) again: rcu_read_lock(); - vbq = &get_cpu_var(vmap_block_queue); + cpu = get_cpu_light(); + vbq = &__get_cpu_var(vmap_block_queue); list_for_each_entry_rcu(vb, &vbq->free, free_list) { int i; @@ -945,7 +947,7 @@ next: if (purge) purge_fragmented_blocks_thiscpu(); - put_cpu_var(vmap_block_queue); + put_cpu_light(); rcu_read_unlock(); if (!addr) { diff --git a/mm/vmscan.c b/mm/vmscan.c index cb33d9cd4d65..39f9629346a1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1344,8 +1344,8 @@ static int too_many_isolated(struct zone *zone, int file, */ static noinline_for_stack void putback_lru_pages(struct zone *zone, struct scan_control *sc, - unsigned long nr_anon, unsigned long nr_file, - struct list_head *page_list) + unsigned long nr_anon, unsigned long nr_file, + struct list_head *page_list, unsigned long nr_reclaimed) { struct page *page; struct pagevec pvec; @@ -1356,7 +1356,12 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, /* * Put back any unfreeable pages. */ - spin_lock(&zone->lru_lock); + spin_lock_irq(&zone->lru_lock); + + if (current_is_kswapd()) + __count_vm_events(KSWAPD_STEAL, nr_reclaimed); + __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); + while (!list_empty(page_list)) { int lru; page = lru_to_page(page_list); @@ -1539,12 +1544,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, priority, &nr_dirty, &nr_writeback); } - local_irq_disable(); - if (current_is_kswapd()) - __count_vm_events(KSWAPD_STEAL, nr_reclaimed); - __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); - - putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); + putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list, nr_reclaimed); /* * If reclaim is isolating dirty pages under writeback, it implies diff --git a/mm/vmstat.c b/mm/vmstat.c index 8fd603b1665e..726f0b6b0508 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, long x; long t; + preempt_disable_rt(); x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); @@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, x = 0; } __this_cpu_write(*p, x); + preempt_enable_rt(); } EXPORT_SYMBOL(__mod_zone_page_state); @@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; + preempt_disable_rt(); v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { @@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); } + preempt_enable_rt(); } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) @@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; + preempt_disable_rt(); v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { @@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(v - overstep, zone, item); __this_cpu_write(*p, overstep); } + preempt_enable_rt(); } void __dec_zone_page_state(struct page *page, enum zone_stat_item item) |