ARM: sched_clock: Load cycle count after epoch stabilizes

There is a small race between when the cycle count is read from the hardware and when the epoch stabilizes. Consider this scenario: CPU0 CPU1 ---- ---- cyc = read_sched_clock() cyc_to_sched_clock() update_sched_clock() ... cd.epoch_cyc = cyc; epoch_cyc = cd.epoch_cyc; ... epoch_ns + cyc_to_ns((cyc - epoch_cyc) The cyc on cpu0 was read before the epoch changed. But we calculate the nanoseconds based on the new epoch by subtracting the new epoch from the old cycle count. Since epoch is most likely larger than the old cycle count we calculate a large number that will be converted to nanoseconds and added to epoch_ns, causing time to jump forward too much. Fix this problem by reading the hardware after the epoch has stabilized. Bug 1399318 Cc: Russell King <linux@arm.linux.org.uk> Signed-off-by: Stephen Boyd <sboyd@codeaurora.org> Signed-off-by: John Stultz <john.stultz@linaro.org> (cherry picked from commit 336ae1180df5f69b9e0fb6561bec01c5f64361cf) Signed-off-by: Ajay Nandakumar <anandakumarm@nvidia.com> Change-Id: I50aedb0eb050206de4ab20505e84a561ec1ca0fb
author: Stephen Boyd <sboyd@codeaurora.org> 2013-06-17 15:40:58 -0700
committer: Ajay Nandakumar <anandakumarm@nvidia.com> 2014-01-13 12:03:37 +0530
commit: fdbaed23b16dd817ceb54fe4458193bf920d1c06 (patch)
tree: aa296144aaf8fc577adfa4d05d436c5641388cc4 /kernel
parent: 6220dca122e5d31ff3a167d14680e424664b8c7a (diff)
1 files changed, 8 insertions, 11 deletions
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index aad1ae6077ef..a326f27d7f09 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -49,10 +49,14 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 	return (cyc * mult) >> shift;
 }
 
-static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
+static unsigned long long notrace sched_clock_32(void)
 {
 	u64 epoch_ns;
 	u32 epoch_cyc;
+	u32 cyc;
+
+	if (cd.suspended)
+		return cd.epoch_ns;
 
 	/*
 	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
@@ -68,7 +72,9 @@ static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
 		smp_rmb();
 	} while (epoch_cyc != cd.epoch_cyc_copy);
 
-	return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift);
+	cyc = read_sched_clock();
+	cyc = (cyc - epoch_cyc) & sched_clock_mask;
+	return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
 }
 
 /*
@@ -160,19 +166,10 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
-static unsigned long long notrace sched_clock_32(void)
-{
-	u32 cyc = read_sched_clock();
-	return cyc_to_sched_clock(cyc, sched_clock_mask);
-}
-
 unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
 
 unsigned long long notrace sched_clock(void)
 {
-	if (cd.suspended)
-		return cd.epoch_ns;
-
 	return sched_clock_func();
 }
author	Stephen Boyd <sboyd@codeaurora.org>	2013-06-17 15:40:58 -0700
committer	Ajay Nandakumar <anandakumarm@nvidia.com>	2014-01-13 12:03:37 +0530
commit	fdbaed23b16dd817ceb54fe4458193bf920d1c06 (patch)
tree	aa296144aaf8fc577adfa4d05d436c5641388cc4 /kernel
parent	6220dca122e5d31ff3a167d14680e424664b8c7a (diff)