From f45755b8346f1a089ca7957dce5f4c3c6cb26e6f Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Fri, 13 Aug 2010 15:19:11 +0800
Subject: KVM: fix poison overwritten caused by using wrong xstate size

fpu.state is allocated from task_xstate_cachep, the size of task_xstate_cachep
is xstate_size. xstate_size is set from cpuid instruction, which is often
smaller than sizeof(struct xsave_struct). kvm is using sizeof(struct xsave_struct)
to fill in/out fpu.state.xsave, as what we allocated for fpu.state is
xstate_size, kernel will write out of memory and caused poison/redzone/padding
overwritten warnings.

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Reviewed-by: Sheng Yang <sheng@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/i387.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index c4444bce8469..f855658d2784 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -40,6 +40,7 @@
 
 static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 unsigned int xstate_size;
+EXPORT_SYMBOL_GPL(xstate_size);
 unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
-- 
cgit v1.2.3


From 351af0725e5222e35741011d1ea62215c1ed06db Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Fri, 6 Aug 2010 13:39:08 +0800
Subject: perf, x86: Fix Intel-nhm PMU programming errata workaround

Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:

 11164cd: perf, x86: Add Nehelem PMU programming errata workaround

doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.

A functional workaround was found by trial & error.

Affects all Nehalem-class Intel PMUs.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 81 ++++++++++++++++++++++++++--------
 1 file changed, 63 insertions(+), 18 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac860ebe0..d8d86d014008 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
  *   Intel Errata AAP53  (model 30)
  *   Intel Errata BD53   (model 44)
  *
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ *   These chips need to be 'reset' when adding counters by programming the
+ *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ *   in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
  */
-static void intel_pmu_nhm_enable_all(int added)
+static void intel_pmu_nhm_workaround(void)
 {
-	if (added) {
-		struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-		int i;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	static const unsigned long nhm_magic[4] = {
+		0x4300B5,
+		0x4300D2,
+		0x4300B1,
+		0x4300B1
+	};
+	struct perf_event *event;
+	int i;
+
+	/*
+	 * The Errata requires below steps:
+	 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+	 * 2) Configure 4 PERFEVTSELx with the magic events and clear
+	 *    the corresponding PMCx;
+	 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+	 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+	 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+	 */
+
+	/*
+	 * The real steps we choose are a little different from above.
+	 * A) To reduce MSR operations, we don't run step 1) as they
+	 *    are already cleared before this function is called;
+	 * B) Call x86_perf_event_update to save PMCx before configuring
+	 *    PERFEVTSELx with magic number;
+	 * C) With step 5), we do clear only when the PERFEVTSELx is
+	 *    not used currently.
+	 * D) Call x86_perf_event_set_period to restore PMCx;
+	 */
 
-		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
-		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
-		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+	/* We always operate 4 pairs of PERF Counters */
+	for (i = 0; i < 4; i++) {
+		event = cpuc->events[i];
+		if (event)
+			x86_perf_event_update(event);
+	}
 
-		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
-		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+	for (i = 0; i < 4; i++) {
+		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+		wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+	}
 
-		for (i = 0; i < 3; i++) {
-			struct perf_event *event = cpuc->events[i];
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
 
-			if (!event)
-				continue;
+	for (i = 0; i < 4; i++) {
+		event = cpuc->events[i];
 
+		if (event) {
+			x86_perf_event_set_period(event);
 			__x86_pmu_enable_event(&event->hw,
-					       ARCH_PERFMON_EVENTSEL_ENABLE);
-		}
+					ARCH_PERFMON_EVENTSEL_ENABLE);
+		} else
+			wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
 	}
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+	if (added)
+		intel_pmu_nhm_workaround();
 	intel_pmu_enable_all(added);
 }
 
-- 
cgit v1.2.3


From 07a7795ca2e6e66d00b184efb46bd0e23d90d3fe Mon Sep 17 00:00:00 2001
From: Hans Rosenfeld <hans.rosenfeld@amd.com>
Date: Wed, 18 Aug 2010 16:19:50 +0200
Subject: x86, cpu: Fix regression in AMD errata checking code

A bug in the family-model-stepping matching code caused the presence of
errata to go undetected when OSVW was not used. This causes hangs on
some K8 systems because the E400 workaround is not enabled.

Signed-off-by: Hans Rosenfeld <hans.rosenfeld@amd.com>
LKML-Reference: <1282141190-930137-1-git-send-email-hans.rosenfeld@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60a57b13082d..ba5f62f45f01 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum)
 	}
 
 	/* OSVW unavailable or ID unknown, match family-model-stepping range */
-	ms = (cpu->x86_model << 8) | cpu->x86_mask;
+	ms = (cpu->x86_model << 4) | cpu->x86_mask;
 	while ((range = *erratum++))
 		if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
 		    (ms >= AMD_MODEL_RANGE_START(range)) &&
-- 
cgit v1.2.3


From fd89a137924e0710078c3ae855e7cec1c43cb845 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 16 Aug 2010 14:38:33 +0200
Subject: x86-32: Separate 1:1 pagetables from swapper_pg_dir

This patch fixes machine crashes which occur when heavily exercising the
CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by
AMD Erratum 383 and result in a fatal machine check exception. Here's
the scenario:

1. On 32-bit, the swapper_pg_dir page table is used as the initial page
table for booting a secondary CPU.

2. To make this work, swapper_pg_dir needs a direct mapping of physical
memory in it (the low mappings). By adding those low, large page (2M)
mappings (PAE kernel), we create the necessary conditions for Erratum
383 to occur.

3. Other CPUs which do not participate in the off- and onlining game may
use swapper_pg_dir while the low mappings are present (when leave_mm is
called). For all steps below, the CPU referred to is a CPU that is using
swapper_pg_dir, and not the CPU which is being onlined.

4. The presence of the low mappings in swapper_pg_dir can result
in TLB entries for addresses below __PAGE_OFFSET to be established
speculatively. These TLB entries are marked global and large.

5. When the CPU with such TLB entry switches to another page table, this
TLB entry remains because it is global.

6. The process then generates an access to an address covered by the
above TLB entry but there is a permission mismatch - the TLB entry
covers a large global page not accessible to userspace.

7. Due to this permission mismatch a new 4kb, user TLB entry gets
established. Further, Erratum 383 provides for a small window of time
where both TLB entries are present. This results in an uncorrectable
machine check exception signalling a TLB multimatch which panics the
machine.

There are two ways to fix this issue:

        1. Always do a global TLB flush when a new cr3 is loaded and the
        old page table was swapper_pg_dir. I consider this a hack hard
        to understand and with performance implications

        2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit
        does.

This patch implements solution 2. It introduces a trampoline_pg_dir
which has the same layout as swapper_pg_dir with low_mappings. This page
table is used as the initial page table of the booting CPU. Later in the
bringup process, it switches to swapper_pg_dir and does a global TLB
flush. This fixes the crashes in our test cases.

-v2: switch to swapper_pg_dir right after entering start_secondary() so
that we are able to access percpu data which might not be mapped in the
trampoline page table.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
LKML-Reference: <20100816123833.GB28147@aftab>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/head_32.S    |  8 +++++++-
 arch/x86/kernel/setup.c      |  2 ++
 arch/x86/kernel/smpboot.c    | 32 +++++++++++++-------------------
 arch/x86/kernel/trampoline.c | 18 ++++++++++++++++++
 4 files changed, 40 insertions(+), 20 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ff4c453e13f3..fa8c1b8e09fb 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -334,7 +334,7 @@ ENTRY(startup_32_smp)
 /*
  * Enable paging
  */
-	movl $pa(swapper_pg_dir),%eax
+	movl pa(initial_page_table), %eax
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %cr0,%eax
 	orl  $X86_CR0_PG,%eax
@@ -614,6 +614,8 @@ ignore_int:
 .align 4
 ENTRY(initial_code)
 	.long i386_start_kernel
+ENTRY(initial_page_table)
+	.long pa(swapper_pg_dir)
 
 /*
  * BSS section
@@ -629,6 +631,10 @@ ENTRY(swapper_pg_dir)
 #endif
 swapper_pg_fixmap:
 	.fill 1024,4,0
+#ifdef CONFIG_X86_TRAMPOLINE
+ENTRY(trampoline_pg_dir)
+	.fill 1024,4,0
+#endif
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b008e7883207..c3a4fbb2b996 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p)
 	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
+	setup_trampoline_page_table();
+
 	tboot_probe();
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a5e928b0cb5f..abf4a86ffc54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,7 +73,6 @@
 
 #ifdef CONFIG_X86_32
 u8 apicid_2_node[MAX_APICID];
-static int low_mappings;
 #endif
 
 /* State of each CPU */
@@ -281,6 +280,18 @@ notrace static void __cpuinit start_secondary(void *unused)
 	 * fragile that we want to limit the things done here to the
 	 * most necessary things.
 	 */
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Switch away from the trampoline page-table
+	 *
+	 * Do this before cpu_init() because it needs to access per-cpu
+	 * data which may not be mapped in the trampoline page-table.
+	 */
+	load_cr3(swapper_pg_dir);
+	__flush_tlb_all();
+#endif
+
 	vmi_bringup();
 	cpu_init();
 	preempt_disable();
@@ -299,12 +310,6 @@ notrace static void __cpuinit start_secondary(void *unused)
 		legacy_pic->chip->unmask(0);
 	}
 
-#ifdef CONFIG_X86_32
-	while (low_mappings)
-		cpu_relax();
-	__flush_tlb_all();
-#endif
-
 	/* This must be done before setting cpu_online_mask */
 	set_cpu_sibling_map(raw_smp_processor_id());
 	wmb();
@@ -750,6 +755,7 @@ do_rest:
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
+	initial_page_table = __pa(&trampoline_pg_dir);
 #else
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
@@ -897,20 +903,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
-#ifdef CONFIG_X86_32
-	/* init low mem mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-		min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-	flush_tlb_all();
-	low_mappings = 1;
-
 	err = do_boot_cpu(apicid, cpu);
 
-	zap_low_mappings(false);
-	low_mappings = 0;
-#else
-	err = do_boot_cpu(apicid, cpu);
-#endif
 	if (err) {
 		pr_debug("do_boot_cpu failed %d\n", err);
 		return -EIO;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index c652ef62742d..a874495b3673 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,6 +1,7 @@
 #include <linux/io.h>
 
 #include <asm/trampoline.h>
+#include <asm/pgtable.h>
 #include <asm/e820.h>
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
@@ -37,3 +38,20 @@ unsigned long __trampinit setup_trampoline(void)
 	memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
 	return virt_to_phys(trampoline_base);
 }
+
+void __init setup_trampoline_page_table(void)
+{
+#ifdef CONFIG_X86_32
+	/* Copy kernel address range */
+	clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS,
+			      KERNEL_PGD_BOUNDARY));
+
+	/* Initialize low mappings */
+	clone_pgd_range(trampoline_pg_dir,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS,
+			      KERNEL_PGD_BOUNDARY));
+#endif
+}
-- 
cgit v1.2.3


From 737480a0d525dae13306296da08029dff545bc72 Mon Sep 17 00:00:00 2001
From: KUMANO Syuhei <kumano.prog@gmail.com>
Date: Sun, 15 Aug 2010 15:18:04 +0900
Subject: kprobes/x86: Fix the return address of multiple kretprobes

Fix the return address of subsequent kretprobes when multiple
kretprobes are set on the same function.

For example:

 # cd /sys/kernel/debug/tracing
 # echo "r:event1 sys_symlink" > kprobe_events
 # echo "r:event2 sys_symlink" >> kprobe_events
 # echo 1 > events/kprobes/enable
 # ln -s /tmp/foo /tmp/bar

(without this patch)

 # cat trace
              ln-897   [000] 20404.133727: event1: (kretprobe_trampoline+0x0/0x4c <- sys_symlink)
              ln-897   [000] 20404.133747: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink)

(with this patch)

 # cat trace
              ln-740   [000] 13799.491076: event1: (system_call_fastpath+0x16/0x1b <- sys_symlink)
              ln-740   [000] 13799.491096: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink)

Signed-off-by: KUMANO Syuhei <kumano.prog@gmail.com>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
LKML-Reference: <1281853084.3254.11.camel@camp10-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1bfb6cf4dd55..770ebfb349e9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	struct hlist_node *node, *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+	kprobe_opcode_t *correct_ret_addr = NULL;
 
 	INIT_HLIST_HEAD(&empty_rp);
 	kretprobe_hash_lock(current, &head, &flags);
@@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 			/* another task is sharing our hash bucket */
 			continue;
 
+		orig_ret_address = (unsigned long)ri->ret_addr;
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	correct_ret_addr = ri->ret_addr;
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
 		if (ri->rp && ri->rp->handler) {
 			__get_cpu_var(current_kprobe) = &ri->rp->kp;
 			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+			ri->ret_addr = correct_ret_addr;
 			ri->rp->handler(ri, regs);
 			__get_cpu_var(current_kprobe) = NULL;
 		}
 
-		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri, &empty_rp);
 
 		if (orig_ret_address != trampoline_address)
@@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 			break;
 	}
 
-	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
 	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
-- 
cgit v1.2.3


From d7c53c9e822a4fefa13a0cae76f3190bfd0d5c11 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@amd64.org>
Date: Thu, 19 Aug 2010 20:10:29 +0200
Subject: x86, hotplug: Serialize CPU hotplug to avoid bringup concurrency
 issues

When testing cpu hotplug code on 32-bit we kept hitting the "CPU%d:
Stuck ??" message due to multiple cores concurrently accessing the
cpu_callin_mask, among others.

Since these codepaths are not protected from concurrent access due to
the fact that there's no sane reason for making an already complex
code unnecessarily more complex - we hit the issue only when insanely
switching cores off- and online - serialize hotplugging cores on the
sysfs level and be done with it.

[ v2.1: fix !HOTPLUG_CPU build ]

Cc: <stable@kernel.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20100819181029.GC17171@aftab>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/smpboot.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index abf4a86ffc54..8b3bfc4dd708 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -90,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
 static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
+
+/*
+ * We need this for trampoline_base protection from concurrent accesses when
+ * off- and onlining cores wildly.
+ */
+static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+        mutex_lock(&x86_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+        mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+}
+
+ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
+ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
 #else
 static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
-- 
cgit v1.2.3


From 05e407603e527f9d808dd3866d3a17c2ce4dfcc5 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Fri, 20 Aug 2010 00:46:16 +0200
Subject: x86, apic: Fix apic=debug boot crash

Fix a boot crash when apic=debug is used and the APIC is
not properly initialized.

This issue appears during Xen Dom0 kernel boot but the
fix is generic and the crash could occur on real hardware
as well.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Cc: xen-devel@lists.xensource.com
Cc: konrad.wilk@oracle.com
Cc: jeremy@goop.org
Cc: <stable@kernel.org> # .35.x, .34.x, .33.x, .32.x
LKML-Reference: <20100819224616.GB9967@router-fw-old.local.net-space.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4dc0084ec1b1..f1efebaf5510 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1728,6 +1728,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 		struct irq_pin_list *entry;
 
 		cfg = desc->chip_data;
+		if (!cfg)
+			continue;
 		entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
-- 
cgit v1.2.3


From cd7240c0b900eb6d690ccee088a6c9b46dae815a Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 19 Aug 2010 17:03:38 -0700
Subject: x86, tsc, sched: Recompute cyc2ns_offset's during resume from sleep
 states

TSC's get reset after suspend/resume (even on cpu's with invariant TSC
which runs at a constant rate across ACPI P-, C- and T-states). And in
some systems BIOS seem to reinit TSC to arbitrary large value (still
sync'd across cpu's) during resume.

This leads to a scenario of scheduler rq->clock (sched_clock_cpu()) less
than rq->age_stamp (introduced in 2.6.32). This leads to a big value
returned by scale_rt_power() and the resulting big group power set by the
update_group_power() is causing improper load balancing between busy and
idle cpu's after suspend/resume.

This resulted in multi-threaded workloads (like kernel-compilation) go
slower after suspend/resume cycle on core i5 laptops.

Fix this by recomputing cyc2ns_offset's during resume, so that
sched_clock() continues from the point where it was left off during
suspend.

Reported-by: Florian Pritz <flo@xssn.at>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: <stable@kernel.org> # [v2.6.32+]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1282262618.2675.24.camel@sbsiddha-MOBL3.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ce8e50239332..d632934cb638 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	local_irq_restore(flags);
 }
 
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+	if (!sched_clock_stable)
+		return;
+
+	cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+	unsigned long long offset;
+	unsigned long flags;
+	int cpu;
+
+	if (!sched_clock_stable)
+		return;
+
+	local_irq_save(flags);
+
+	get_cpu_var(cyc2ns_offset) = 0;
+	offset = cyc2ns_suspend - sched_clock();
+
+	for_each_possible_cpu(cpu)
+		per_cpu(cyc2ns_offset, cpu) = offset;
+
+	local_irq_restore(flags);
+}
+
 #ifdef CONFIG_CPU_FREQ
 
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
-- 
cgit v1.2.3


From 51e3c1b558b31b11bf5fc66d3c6f5adacf3573f7 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Fri, 20 Aug 2010 10:36:34 +0300
Subject: x86, hwmon: Fix unsafe smp_processor_id() in thermal_throttle_add_dev

Fix BUG: using smp_processor_id() in preemptible thermal_throttle_add_dev.
We know the cpu number when calling thermal_throttle_add_dev, so we can
remove smp_processor_id call in thermal_throttle_add_dev by supplying
the cpu number as argument.

This should resolve kernel bugzilla 16615/16629.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
LKML-Reference: <20100820073634.GB5209@swordfish.minsk.epam.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Maciej Rutecki <maciej.rutecki@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index c2a8b26d4fea..d9368eeda309 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level)
 
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
-static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
+static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
+				unsigned int cpu)
 {
 	int err;
-	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
 	if (err)
@@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
 		mutex_lock(&therm_cpu_lock);
-		err = thermal_throttle_add_dev(sys_dev);
+		err = thermal_throttle_add_dev(sys_dev, cpu);
 		mutex_unlock(&therm_cpu_lock);
 		WARN_ON(err);
 		break;
@@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void)
 #endif
 	/* connect live CPUs to sysfs */
 	for_each_online_cpu(cpu) {
-		err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+		err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
 		WARN_ON(err);
 	}
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v1.2.3


From b7d460897739e02f186425b7276e3fdb1595cea7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 24 Aug 2010 22:44:12 -0700
Subject: x86, mm: Fix CONFIG_VMSPLIT_1G and 2G_OPT trampoline

rc2 kernel crashes when booting second cpu on this CONFIG_VMSPLIT_2G_OPT
laptop: whereas cloning from kernel to low mappings pgd range does need
to limit by both KERNEL_PGD_PTRS and KERNEL_PGD_BOUNDARY, cloning kernel
pgd range itself must not be limited by the smaller KERNEL_PGD_BOUNDARY.

Signed-off-by: Hugh Dickins <hughd@google.com>
LKML-Reference: <alpine.LSU.2.00.1008242235120.2515@sister.anvils>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/trampoline.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a874495b3673..e2a595257390 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void)
 	/* Copy kernel address range */
 	clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-			min_t(unsigned long, KERNEL_PGD_PTRS,
-			      KERNEL_PGD_BOUNDARY));
+			KERNEL_PGD_PTRS);
 
 	/* Initialize low mappings */
 	clone_pgd_range(trampoline_pg_dir,
-- 
cgit v1.2.3


From 8d330919927ea31fa083b5a80084dc991da813a0 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 25 Aug 2010 21:06:32 +0000
Subject: perf, x86, Pentium4: Clear the P4_CCCR_FORCE_OVF flag

If on Pentium4 CPUs the FORCE_OVF flag is set then an NMI happens
on every event, which can generate a flood of NMIs. Clear it.

Reported-by: Vince Weaver <vweaver1@eecs.utk.edu>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index febb12cea795..7e578e9cc58b 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event)
 		event->hw.config |= event->attr.config &
 			(p4_config_pack_escr(P4_ESCR_MASK_HT) |
 			 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
+
+		event->hw.config &= ~P4_CCCR_FORCE_OVF;
 	}
 
 	rc = x86_setup_perfctr(event);
-- 
cgit v1.2.3


From 2e556b5b320838fde98480a1f6cf220a5af200fc Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Thu, 2 Sep 2010 15:07:47 -0400
Subject: perf, x86: Fix accidentally ack'ing a second event on intel perf
 counter

During testing of a patch to stop having the perf subsytem
swallow nmis, it was uncovered that Nehalem boxes were randomly
getting unknown nmis when using the perf tool.

Moving the ack'ing of the PMI closer to when we get the status
allows the hardware to properly re-set the PMU bit signaling
another PMI was triggered during the processing of the first
PMI.  This allows the new logic for dealing with the
shortcomings of multiple PMIs to handle the extra NMI by
'eat'ing it later.

Now one can wonder why are we getting a second PMI when we
disable all the PMUs in the begining of the NMI handler to
prevent such a case, for that I do not know.  But I know the fix
below helps deal with this quirk.

Tested on multiple Nehalems where the problem was occuring.
With the patch, the code now loops a second time to handle the
second PMI (whereas before it was not).

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: robert.richter@amd.com
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d8d86d014008..1297bf15cb88 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -712,7 +712,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
 	int bit, loops;
-	u64 ack, status;
+	u64 status;
 
 	perf_sample_data_init(&data, 0);
 
@@ -728,6 +728,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	loops = 0;
 again:
+	intel_pmu_ack_status(status);
 	if (++loops > 100) {
 		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
 		perf_event_print_debug();
@@ -736,7 +737,6 @@ again:
 	}
 
 	inc_irq_stat(apic_perf_irqs);
-	ack = status;
 
 	intel_pmu_lbr_read();
 
@@ -761,8 +761,6 @@ again:
 			x86_pmu_stop(event);
 	}
 
-	intel_pmu_ack_status(ack);
-
 	/*
 	 * Repeat if there is more work to be done:
 	 */
-- 
cgit v1.2.3


From de725dec9de7a7541996176d59cf8542365b8b0e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Sep 2010 15:07:49 -0400
Subject: perf, x86: Fix handle_irq return values

Now that we rely on the number of handled overflows, ensure all
handle_irq implementations actually return the right number.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: robert.richter@amd.com
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-4-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 9 +++++++--
 arch/x86/kernel/cpu/perf_event_p4.c    | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1297bf15cb88..ee05c90012d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,6 +713,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	struct cpu_hw_events *cpuc;
 	int bit, loops;
 	u64 status;
+	int handled = 0;
 
 	perf_sample_data_init(&data, 0);
 
@@ -743,12 +744,16 @@ again:
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */
-	if (__test_and_clear_bit(62, (unsigned long *)&status))
+	if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+		handled++;
 		x86_pmu.drain_pebs(regs);
+	}
 
 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
+		handled++;
+
 		if (!test_bit(bit, cpuc->active_mask))
 			continue;
 
@@ -770,7 +775,7 @@ again:
 
 done:
 	intel_pmu_enable_all(0);
-	return 1;
+	return handled;
 }
 
 static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 7e578e9cc58b..b560db3305be 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -692,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		inc_irq_stat(apic_perf_irqs);
 	}
 
-	return handled > 0;
+	return handled;
 }
 
 /*
-- 
cgit v1.2.3


From 4177c42a6301a34c20038ec2771a33dcc30bb338 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 2 Sep 2010 15:07:48 -0400
Subject: perf, x86: Try to handle unknown nmis with an enabled PMU

When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.

The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.

This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.

This is a debug log:

 cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
 cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
 cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
 cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
 cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
 cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
 cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
 cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
 cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
 cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
 cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
 cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
 cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
 cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
 cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
 cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
 Uhhuh. NMI received for unknown reason 00 on CPU 6.
 Do you have a strange power saving mode enabled?
 Dazed and confused, but trying to continue

Deltas:

 nmi #32334 340186
 nmi #32335 1327704
 nmi #32336 1819      <<<< back-to-back nmi [1]
 nmi #32337 85961
 nmi #32338 284507
 nmi #32339 1578809
 nmi #32340 217616
 nmi #32341 1798      <<<< back-to-back nmi [2]
 nmi #32342 240913
 nmi #32343 1512809
 nmi #32344 116672
 nmi #32345 412453
 nmi #32346 1462095   <<<< 1st nmi (standard) handling 2 counters
 nmi #32347 2046      <<<< 2nd nmi (back-to-back) handling one
 counter nmi #32348 1773      <<<< 3rd nmi (back-to-back)
 handling no counter! [3]

For  back-to-back nmi detection there are the following rules:

The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).

There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 59 +++++++++++++++++++++++++++++++---------
 1 file changed, 46 insertions(+), 13 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2da20fda02d..3efdf2870a35 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		/*
 		 * event overflow
 		 */
-		handled		= 1;
+		handled++;
 		data.period	= event->hw.last_period;
 
 		if (!x86_perf_event_set_period(event))
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
+struct pmu_nmi_state {
+	unsigned int	marked;
+	int		handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
 static int __kprobes
 perf_event_nmi_handler(struct notifier_block *self,
 			 unsigned long cmd, void *__args)
 {
 	struct die_args *args = __args;
-	struct pt_regs *regs;
+	unsigned int this_nmi;
+	int handled;
 
 	if (!atomic_read(&active_events))
 		return NOTIFY_DONE;
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
 	case DIE_NMI:
 	case DIE_NMI_IPI:
 		break;
-
+	case DIE_NMIUNKNOWN:
+		this_nmi = percpu_read(irq_stat.__nmi_count);
+		if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+			/* let the kernel handle the unknown nmi */
+			return NOTIFY_DONE;
+		/*
+		 * This one is a PMU back-to-back nmi. Two events
+		 * trigger 'simultaneously' raising two back-to-back
+		 * NMIs. If the first NMI handles both, the latter
+		 * will be empty and daze the CPU. So, we drop it to
+		 * avoid false-positive 'unknown nmi' messages.
+		 */
+		return NOTIFY_STOP;
 	default:
 		return NOTIFY_DONE;
 	}
 
-	regs = args->regs;
-
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	/*
-	 * Can't rely on the handled return value to say it was our NMI, two
-	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
-	 *
-	 * If the first NMI handles both, the latter will be empty and daze
-	 * the CPU.
-	 */
-	x86_pmu.handle_irq(regs);
+
+	handled = x86_pmu.handle_irq(args->regs);
+	if (!handled)
+		return NOTIFY_DONE;
+
+	this_nmi = percpu_read(irq_stat.__nmi_count);
+	if ((handled > 1) ||
+		/* the next nmi could be a back-to-back nmi */
+	    ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+	     (__get_cpu_var(pmu_nmi).handled > 1))) {
+		/*
+		 * We could have two subsequent back-to-back nmis: The
+		 * first handles more than one counter, the 2nd
+		 * handles only one counter and the 3rd handles no
+		 * counter.
+		 *
+		 * This is the 2nd nmi because the previous was
+		 * handling more than one counter. We will mark the
+		 * next (3rd) and then drop it if unhandled.
+		 */
+		__get_cpu_var(pmu_nmi).marked	= this_nmi + 1;
+		__get_cpu_var(pmu_nmi).handled	= handled;
+	}
 
 	return NOTIFY_STOP;
 }
-- 
cgit v1.2.3


From 1389298f7d14f179c2fa4b4b343079bd56082c70 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 27 Aug 2010 11:20:07 +0200
Subject: x86, mcheck: Avoid duplicate sysfs links/files for thresholding banks

kobject_add_internal failed for threshold_bank2 with -EEXIST,
don't try to register things with the same name in the same
directory:

  Pid: 1, comm: swapper Tainted: G        W  2.6.31 #1
  Call Trace:
  [<ffffffff81161b07>] ? kobject_add_internal+0x156/0x180
  [<ffffffff81161cc0>] ? kobject_add+0x66/0x6b
  [<ffffffff81161793>] ? kobject_init+0x42/0x82
  [<ffffffff81161cf9>] ? kobject_create_and_add+0x34/0x63
  [<ffffffff81393963>] ? threshold_create_bank+0x14f/0x259
  [<ffffffff8139310a>] ? mce_create_device+0x8d/0x1b8
  [<ffffffff81646497>] ? threshold_init_device+0x3f/0x80
  [<ffffffff81646458>] ? threshold_init_device+0x0/0x80
  [<ffffffff81009050>] ? do_one_initcall+0x4f/0x143
  [<ffffffff816413a0>] ? kernel_init+0x14c/0x1a2
  [<ffffffff8100c8da>] ? child_rip+0xa/0x20
  [<ffffffff81641254>] ? kernel_init+0x0/0x1a2
  [<ffffffff8100c8d0>] ? child_rip+0x0/0x20
  kobject_create_and_add: kobject_add error: -17

(Probably the for_each_cpu loop should be entirely removed.)

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
LKML-Reference: <20100827092006.GB5348@loge.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 224392d8fe8c..5e975298fa81 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		err = -ENOMEM;
 		goto out;
 	}
-	if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
 		kfree(b);
 		err = -ENOMEM;
 		goto out;
@@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 #ifndef CONFIG_SMP
 	cpumask_setall(b->cpus);
 #else
-	cpumask_copy(b->cpus, c->llc_shared_map);
+	cpumask_set_cpu(cpu, b->cpus);
 #endif
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
-- 
cgit v1.2.3


From 36ac4b987bea9a95217e1af552252f275ca7fc44 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Fri, 10 Sep 2010 10:08:08 -0500
Subject: x86, UV: Fix initialization of max_pnode

Fix calculation of "max_pnode" for systems where the the highest
blade has neither cpus or memory. (And, yes, although rare this
does occur).

Signed-off-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20100910150808.GA19802@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7b598b84c902..f744f54cb248 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -698,9 +698,11 @@ void __init uv_system_init(void)
 		for (j = 0; j < 64; j++) {
 			if (!test_bit(j, &present))
 				continue;
-			uv_blade_info[blade].pnode = (i * 64 + j);
+			pnode = (i * 64 + j);
+			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
+			max_pnode = max(pnode, max_pnode);
 			blade++;
 		}
 	}
@@ -738,7 +740,6 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
-		max_pnode = max(pnode, max_pnode);
 	}
 
 	/* Add blade/pnode info for nodes without cpus */
@@ -750,7 +751,6 @@ void __init uv_system_init(void)
 		pnode = (paddr >> m_val) & pnode_mask;
 		blade = boot_pnode_to_blade(pnode);
 		uv_node_to_blade[nid] = blade;
-		max_pnode = max(pnode, max_pnode);
 	}
 
 	map_gru_high(max_pnode);
-- 
cgit v1.2.3


From 5ee5e97ee9bca919af11c562beeaf61741ad33f1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 10 Sep 2010 22:32:53 +0200
Subject: x86, tsc: Fix a preemption leak in restore_sched_clock_state()

A real life genuine preemption leak..

Reported-and-tested-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d632934cb638..26a863a9c2a8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -655,7 +655,7 @@ void restore_sched_clock_state(void)
 
 	local_irq_save(flags);
 
-	get_cpu_var(cyc2ns_offset) = 0;
+	__get_cpu_var(cyc2ns_offset) = 0;
 	offset = cyc2ns_suspend - sched_clock();
 
 	for_each_possible_cpu(cpu)
-- 
cgit v1.2.3


From 54ff7e595d763d894104d421b103a89f7becf47c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Sep 2010 22:10:21 +0200
Subject: x86: hpet: Work around hardware stupidity

This more or less reverts commits 08be979 (x86: Force HPET
readback_cmp for all ATI chipsets) and 30a564be (x86, hpet: Restrict
read back to affected ATI chipsets) to the status of commit 8da854c
(x86, hpet: Erratum workaround for read after write of HPET
comparator).

The delta to commit 8da854c is mostly comments and the change from
WARN_ONCE to printk_once as we know the call path of this function
already.

This needs really in depth explanation:

First of all the HPET design is a complete failure. Having a counter
compare register which generates an interrupt on matching values
forces the software to do at least one superfluous readback of the
counter register.

While it is nice in theory to program "absolute" time events it is
practically useless because the timer runs at some absurd frequency
which can never be matched to real world units. So we are forced to
calculate a relative delta and this forces a readout of the actual
counter value, adding the delta and programming the compare
register. When the delta is small enough we run into the danger that
we program a compare value which is already in the past. Due to the
compare for equal nature of HPET we need to read back the counter
value after writing the compare rehgister (btw. this is necessary for
absolute timeouts as well) to make sure that we did not miss the timer
event. We try to work around that by setting the minimum delta to a
value which is larger than the theoretical time which elapses between
the counter readout and the compare register write, but that's only
true in theory. A NMI or SMI which hits between the readout and the
write can easily push us beyond that limit. This would result in
waiting for the next HPET timer interrupt until the 32bit wraparound
of the counter happens which takes about 306 seconds.

So we designed the next event function to look like:

   match = read_cnt() + delta;
   write_compare_ref(match);
   return read_cnt() < match ? 0 : -ETIME;

At some point we got into trouble with certain ATI chipsets. Even the
above "safe" procedure failed. The reason was that the write to the
compare register was delayed probably for performance reasons. The
theory was that they wanted to avoid the synchronization of the write
with the HPET clock, which is understandable. So the write does not
hit the compare register directly instead it goes to some intermediate
register which is copied to the real compare register in sync with the
HPET clock. That opens another window for hitting the dreaded "wait
for a wraparound" problem.

To work around that "optimization" we added a read back of the compare
register which either enforced the update of the just written value or
just delayed the readout of the counter enough to avoid the issue. We
unfortunately never got any affirmative info from ATI/AMD about this.

One thing is sure, that we nuked the performance "optimization" that
way completely and I'm pretty sure that the result is worse than
before some HW folks came up with those.

Just for paranoia reasons I added a check whether the read back
compare register value was the same as the value we wrote right
before. That paranoia check triggered a couple of years after it was
added on an Intel ICH9 chipset. Venki added a workaround (commit
8da854c) which was reading the compare register twice when the first
check failed. We considered this to be a penalty in general and
restricted the readback (thus the wasted CPU cycles) to the known to
be affected ATI chipsets.

This turned out to be a utterly wrong decision. 2.6.35 testers
experienced massive problems and finally one of them bisected it down
to commit 30a564be which spured some further investigation.

Finally we got confirmation that the write to the compare register can
be delayed by up to two HPET clock cycles which explains the problems
nicely. All we can do about this is to go back to Venki's initial
workaround in a slightly modified version.

Just for the record I need to say, that all of this could have been
avoided if hardware designers and of course the HPET committee would
have thought about the consequences for a split second. It's out of my
comprehension why designing a working timer is so hard. There are two
ways to achieve it:

 1) Use a counter wrap around aware compare_reg <= counter_reg
    implementation instead of the easy compare_reg == counter_reg

    Downsides:

	- It needs more silicon.

	- It needs a readout of the counter to apply a relative
	  timeout. This is necessary as the counter does not run in
	  any useful (and adjustable) frequency and there is no
	  guarantee that the counter which is used for timer events is
	  the same which is used for reading the actual time (and
	  therefor for calculating the delta)

    Upsides:

	- None

  2) Use a simple down counter for relative timer events

    Downsides:

	- Absolute timeouts are not possible, which is not a problem
	  at all in the context of an OS and the expected
	  max. latencies/jitter (also see Downsides of #1)

   Upsides:

	- It needs less or equal silicon.

	- It works ALWAYS

	- It is way faster than a compare register based solution (One
	  write versus one write plus at least one and up to four
	  reads)

I would not be so grumpy about all of this, if I would not have been
ignored for many years when pointing out these flaws to various
hardware folks. I really hate timers (at least those which seem to be
designed by janitors).

Though finally we got a reasonable explanation plus a solution and I
want to thank all the folks involved in chasing it down and providing
valuable input to this.

Bisected-by: Nix <nix@esperi.org.uk>
Reported-by: Artur Skawina <art.08.09@gmail.com>
Reported-by: Damien Wyart <damien.wyart@free.fr>
Reported-by: John Drescher <drescherjm@gmail.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: stable@kernel.org
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/early-quirks.c | 18 ------------------
 arch/x86/kernel/hpet.c         | 31 +++++++++++++++++--------------
 2 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index e5cc7e82e60d..ebdb85cf2686 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,7 +18,6 @@
 #include <asm/apic.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
-#include <asm/hpet.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
 }
 #endif
 
-/*
- * Force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
- *
- * We do this on all SMBUS incarnations for now until we have more
- * information about the affected chipsets.
- */
-static void __init ati_hpet_bugs(int num, int slot, int func)
-{
-#ifdef CONFIG_HPET_TIMER
-	hpet_readback_cmp = 1;
-#endif
-}
-
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
-	{ PCI_VENDOR_ID_ATI, PCI_ANY_ID,
-	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
 	{}
 };
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 351f9c0fea1f..410fdb3f1939 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -35,7 +35,6 @@
 unsigned long				hpet_address;
 u8					hpet_blockid; /* OS timer block num */
 u8					hpet_msi_disable;
-u8					hpet_readback_cmp;
 
 #ifdef CONFIG_PCI_MSI
 static unsigned long			hpet_num_timers;
@@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta,
 	 * at that point and we would wait for the next hpet interrupt
 	 * forever. We found out that reading the CMP register back
 	 * forces the transfer so we can rely on the comparison with
-	 * the counter register below.
+	 * the counter register below. If the read back from the
+	 * compare register does not match the value we programmed
+	 * then we might have a real hardware problem. We can not do
+	 * much about it here, but at least alert the user/admin with
+	 * a prominent warning.
 	 *
-	 * That works fine on those ATI chipsets, but on newer Intel
-	 * chipsets (ICH9...) this triggers due to an erratum: Reading
-	 * the comparator immediately following a write is returning
-	 * the old value.
+	 * An erratum on some chipsets (ICH9,..), results in
+	 * comparator read immediately following a write returning old
+	 * value. Workaround for this is to read this value second
+	 * time, when first read returns old value.
 	 *
-	 * We restrict the read back to the affected ATI chipsets (set
-	 * by quirks) and also run it with hpet=verbose for debugging
-	 * purposes.
+	 * In fact the write to the comparator register is delayed up
+	 * to two HPET cycles so the workaround we tried to restrict
+	 * the readback to those known to be borked ATI chipsets
+	 * failed miserably. So we give up on optimizations forever
+	 * and penalize all HPET incarnations unconditionally.
 	 */
-	if (hpet_readback_cmp || hpet_verbose) {
-		u32 cmp = hpet_readl(HPET_Tn_CMP(timer));
-
-		if (cmp != cnt)
+	if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+		if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
 			printk_once(KERN_WARNING
-			    "hpet: compare register read back failed.\n");
+				"hpet: compare register read back failed.\n");
 	}
 
 	return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
-- 
cgit v1.2.3


From 89e45aac42d40426c97e6901811309bf49c4993f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 17 Sep 2010 03:24:13 +0200
Subject: x86: Fix instruction breakpoint encoding

Lengths and types of breakpoints are encoded in a half byte
into CPU registers. However when we extract these values
and store them, we add a high half byte part to them: 0x40 to the
length and 0x80 to the type.
When that gets reloaded to the CPU registers, the high part
is masked.

While making the instruction breakpoints available for perf,
I zapped that high part on instruction breakpoint encoding
and that broke the arch -> generic translation used by ptrace
instruction breakpoints. Writing dr7 to set an inst breakpoint
was then failing.

There is no apparent reason for these high parts so we could get
rid of them altogether. That's an invasive change though so let's
do that later and for now fix the problem by restoring that inst
breakpoint high part encoding in this sole patch.

Reported-by: Kelvie Wong <kelvie@ieee.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/x86/kernel/hw_breakpoint.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a474ec37c32f..ff15c9dcc25d 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
 int arch_bp_generic_fields(int x86_len, int x86_type,
 			   int *gen_len, int *gen_type)
 {
-	/* Len */
-	switch (x86_len) {
-	case X86_BREAKPOINT_LEN_X:
+	/* Type */
+	switch (x86_type) {
+	case X86_BREAKPOINT_EXECUTE:
+		if (x86_len != X86_BREAKPOINT_LEN_X)
+			return -EINVAL;
+
+		*gen_type = HW_BREAKPOINT_X;
 		*gen_len = sizeof(long);
+		return 0;
+	case X86_BREAKPOINT_WRITE:
+		*gen_type = HW_BREAKPOINT_W;
 		break;
+	case X86_BREAKPOINT_RW:
+		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (x86_len) {
 	case X86_BREAKPOINT_LEN_1:
 		*gen_len = HW_BREAKPOINT_LEN_1;
 		break;
@@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
 		return -EINVAL;
 	}
 
-	/* Type */
-	switch (x86_type) {
-	case X86_BREAKPOINT_EXECUTE:
-		*gen_type = HW_BREAKPOINT_X;
-		break;
-	case X86_BREAKPOINT_WRITE:
-		*gen_type = HW_BREAKPOINT_W;
-		break;
-	case X86_BREAKPOINT_RW:
-		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	ret = -EINVAL;
 
 	switch (info->len) {
-	case X86_BREAKPOINT_LEN_X:
-		align = sizeof(long) -1;
-		break;
 	case X86_BREAKPOINT_LEN_1:
 		align = 0;
 		break;
-- 
cgit v1.2.3