summaryrefslogtreecommitdiff
path: root/mm/vmstat.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 17:02:58 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 17:02:58 -0800
commit72eb6a791459c87a0340318840bb3bd9252b627b (patch)
tree3bfb8ad99f9c7e511f37f72d57b56a2cea06d753 /mm/vmstat.c
parent23d69b09b78c4876e134f104a3814c30747c53f1 (diff)
parent55ee4ef30241a62b700f79517e6d5ef2ddbefa67 (diff)
Merge branch 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (30 commits) gameport: use this_cpu_read instead of lookup x86: udelay: Use this_cpu_read to avoid address calculation x86: Use this_cpu_inc_return for nmi counter x86: Replace uses of current_cpu_data with this_cpu ops x86: Use this_cpu_ops to optimize code vmstat: User per cpu atomics to avoid interrupt disable / enable irq_work: Use per cpu atomics instead of regular atomics cpuops: Use cmpxchg for xchg to avoid lock semantics x86: this_cpu_cmpxchg and this_cpu_xchg operations percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support percpu,x86: relocate this_cpu_add_return() and friends connector: Use this_cpu operations xen: Use this_cpu_inc_return taskstats: Use this_cpu_ops random: Use this_cpu_inc_return fs: Use this_cpu_inc_return in buffer.c highmem: Use this_cpu_xx_return() operations vmstat: Use this_cpu_inc_return for vm statistics x86: Support for this_cpu_add, sub, dec, inc_return percpu: Generic support for this_cpu_add, sub, dec, inc_return ... Fixed up conflicts: in arch/x86/kernel/{apic/nmi.c, apic/x2apic_uv_x.c, process.c} as per Tejun.
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c149
1 files changed, 113 insertions, 36 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 33c33e7a0f9b..312d728976f1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -167,36 +167,24 @@ static void refresh_zone_stat_thresholds(void)
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
int delta)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
-
- s8 *p = pcp->vm_stat_diff + item;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
long x;
+ long t;
+
+ x = delta + __this_cpu_read(*p);
- x = delta + *p;
+ t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
+ if (unlikely(x > t || x < -t)) {
zone_page_state_add(x, zone, item);
x = 0;
}
- *p = x;
+ __this_cpu_write(*p, x);
}
EXPORT_SYMBOL(__mod_zone_page_state);
/*
- * For an unknown interrupt state
- */
-void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
- int delta)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_zone_page_state(zone, item, delta);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_zone_page_state);
-
-/*
* Optimized increment and decrement functions.
*
* These are only for a single page and therefore can take a struct page *
@@ -221,16 +209,17 @@ EXPORT_SYMBOL(mod_zone_page_state);
*/
void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
- s8 *p = pcp->vm_stat_diff + item;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
- (*p)++;
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v > t)) {
+ s8 overstep = t >> 1;
- if (unlikely(*p > pcp->stat_threshold)) {
- int overstep = pcp->stat_threshold / 2;
-
- zone_page_state_add(*p + overstep, zone, item);
- *p = -overstep;
+ zone_page_state_add(v + overstep, zone, item);
+ __this_cpu_write(*p, -overstep);
}
}
@@ -242,16 +231,17 @@ EXPORT_SYMBOL(__inc_zone_page_state);
void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
- s8 *p = pcp->vm_stat_diff + item;
-
- (*p)--;
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
- if (unlikely(*p < - pcp->stat_threshold)) {
- int overstep = pcp->stat_threshold / 2;
+ v = __this_cpu_dec_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v < - t)) {
+ s8 overstep = t >> 1;
- zone_page_state_add(*p - overstep, zone, item);
- *p = overstep;
+ zone_page_state_add(v - overstep, zone, item);
+ __this_cpu_write(*p, overstep);
}
}
@@ -261,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
}
EXPORT_SYMBOL(__dec_zone_page_state);
+#ifdef CONFIG_CMPXCHG_LOCAL
+/*
+ * If we have cmpxchg_local support then we do not need to incur the overhead
+ * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
+ *
+ * mod_state() modifies the zone counter state through atomic per cpu
+ * operations.
+ *
+ * Overstep mode specifies how overstep should handled:
+ * 0 No overstepping
+ * 1 Overstepping half of threshold
+ * -1 Overstepping minus half of threshold
+*/
+static inline void mod_state(struct zone *zone,
+ enum zone_stat_item item, int delta, int overstep_mode)
+{
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ long o, n, t, z;
+
+ do {
+ z = 0; /* overflow to zone counters */
+
+ /*
+ * The fetching of the stat_threshold is racy. We may apply
+ * a counter threshold to the wrong the cpu if we get
+ * rescheduled while executing here. However, the following
+ * will apply the threshold again and therefore bring the
+ * counter under the threshold.
+ */
+ t = this_cpu_read(pcp->stat_threshold);
+
+ o = this_cpu_read(*p);
+ n = delta + o;
+
+ if (n > t || n < -t) {
+ int os = overstep_mode * (t >> 1) ;
+
+ /* Overflow must be added to zone counters */
+ z = n + os;
+ n = -os;
+ }
+ } while (this_cpu_cmpxchg(*p, o, n) != o);
+
+ if (z)
+ zone_page_state_add(z, zone, item);
+}
+
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ mod_state(zone, item, delta, 0);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
+void inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+ mod_state(zone, item, 1, 1);
+}
+
+void inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ mod_state(page_zone(page), item, 1, 1);
+}
+EXPORT_SYMBOL(inc_zone_page_state);
+
+void dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ mod_state(page_zone(page), item, -1, -1);
+}
+EXPORT_SYMBOL(dec_zone_page_state);
+#else
+/*
+ * Use interrupt disable to serialize counter updates
+ */
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_zone_page_state(zone, item, delta);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
void inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
unsigned long flags;
@@ -291,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
local_irq_restore(flags);
}
EXPORT_SYMBOL(dec_zone_page_state);
+#endif
/*
* Update the zone counters for one cpu.