diff options
author | Paul Mackerras <paulus@samba.org> | 2008-10-27 23:56:03 +0000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-11-06 09:49:22 +1100 |
commit | 597bc5c00b666fe123abb0af64f6e86f7ab72a90 (patch) | |
tree | f6be6e6f07fb1caff3b670a7ac3df74a621ae364 /arch/powerpc/kernel/vdso32 | |
parent | c73049f6aa58ac1d1a9ca8cb2b415ef97240b2d3 (diff) |
powerpc: Improve resolution of VDSO clock_gettime
Currently the clock_gettime implementation in the VDSO produces a
result with microsecond resolution for the cases that are handled
without a system call, i.e. CLOCK_REALTIME and CLOCK_MONOTONIC. The
nanoseconds field of the result is obtained by computing a
microseconds value and multiplying by 1000.
This changes the code in the VDSO to do the computation for
clock_gettime with nanosecond resolution. That means that the
resolution of the result will ultimately depend on the timebase
frequency.
Because the timestamp in the VDSO datapage (stamp_xsec, the real time
corresponding to the timebase count in tb_orig_stamp) is in units of
2^-20 seconds, it doesn't have sufficient resolution for computing a
result with nanosecond resolution. Therefore this adds a copy of
xtime to the VDSO datapage and updates it in update_gtod() along with
the other time-related fields.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/kernel/vdso32')
-rw-r--r-- | arch/powerpc/kernel/vdso32/gettimeofday.S | 208 |
1 files changed, 125 insertions, 83 deletions
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 72ca26df457e..ee038d4bf252 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -16,6 +16,13 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> +/* Offset for the low 32-bit part of a field of long type */ +#ifdef CONFIG_PPC64 +#define LOPART 4 +#else +#define LOPART 0 +#endif + .text /* * Exact prototype of gettimeofday @@ -90,101 +97,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 - mr r10,r3 /* r10 saves id */ mr r11,r4 /* r11 saves tp */ bl __get_datapage@local /* get data page */ mr r9,r3 /* datapage ptr in r9 */ - beq cr1,50f /* if monotonic -> jump there */ - - /* - * CLOCK_REALTIME - */ - - bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 98f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r5,r4,12,20,31 - rlwimi r5,r3,12,0,19 - stw r5,TSPC32_TV_SEC(r11) - /* get remaining xsec and convert to nsec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication, then we multiply by 1000 - */ - rlwinm r5,r4,12,0,19 - lis r6,1000000@h - ori r6,r6,1000000@l - mulhwu r5,r5,r6 - mulli r5,r5,1000 - stw r5,TSPC32_TV_NSEC(r11) - mtlr r12 - crclr cr0*4+so - li r3,0 - blr +50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ + bne cr1,80f /* not monotonic -> all done */ /* * CLOCK_MONOTONIC */ -50: bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 98f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r6,r4,12,20,31 - rlwimi r6,r3,12,0,19 - - /* get remaining xsec and convert to nsec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication, then we multiply by 1000 - */ - rlwinm r7,r4,12,0,19 - lis r5,1000000@h - ori r5,r5,1000000@l - mulhwu r7,r7,r5 - mulli r7,r7,1000 - /* now we must fixup using wall to monotonic. We need to snapshot * that value and do the counter trick again. Fortunately, we still * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5 - * can be used + * At this point, r3,r4 contain our sec/nsec values, r5 and r6 + * can be used, r7 contains NSEC_PER_SEC. */ - lwz r3,WTOM_CLOCK_SEC(r9) - lwz r4,WTOM_CLOCK_NSEC(r9) + lwz r5,WTOM_CLOCK_SEC(r9) + lwz r6,WTOM_CLOCK_NSEC(r9) - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter + /* We now have our offset in r5,r6. We create a fake dependency + * on that value and re-check the counter */ - or r5,r4,r3 - xor r0,r5,r5 + or r0,r6,r5 + xor r0,r0,r0 add r9,r9,r0 -#ifdef CONFIG_PPC64 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) -#else - lwz r0,(CFG_TB_UPDATE_COUNT)(r9) -#endif + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) cmpl cr0,r8,r0 /* check if updated */ bne- 50b - /* Calculate and store result. Note that this mimmics the C code, + /* Calculate and store result. Note that this mimics the C code, * which may cause funny results if nsec goes negative... is that * possible at all ? */ - add r3,r3,r6 - add r4,r4,r7 - lis r5,NSEC_PER_SEC@h - ori r5,r5,NSEC_PER_SEC@l - cmpl cr0,r4,r5 - cmpli cr1,r4,0 + add r3,r3,r5 + add r4,r4,r6 + cmpw cr0,r4,r7 + cmpwi cr1,r4,0 blt 1f - subf r4,r5,r4 + subf r4,r7,r4 addi r3,r3,1 -1: bge cr1,1f +1: bge cr1,80f addi r3,r3,-1 - add r4,r4,r5 -1: stw r3,TSPC32_TV_SEC(r11) + add r4,r4,r7 + +80: stw r3,TSPC32_TV_SEC(r11) stw r4,TSPC32_TV_NSEC(r11) mtlr r12 @@ -195,10 +154,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) /* * syscall fallback */ -98: - mtlr r12 - mr r3,r10 - mr r4,r11 99: li r0,__NR_clock_gettime sc @@ -254,11 +209,7 @@ __do_get_xsec: /* Check for update count & load values. We use the low * order 32 bits of the update count */ -#ifdef CONFIG_PPC64 -1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) -#else -1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9) -#endif +1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) andi. r0,r8,1 /* pending update ? loop */ bne- 1b xor r0,r8,r8 /* create dependency */ @@ -305,11 +256,7 @@ __do_get_xsec: or r6,r4,r3 xor r0,r6,r6 add r9,r9,r0 -#ifdef CONFIG_PPC64 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) -#else - lwz r0,(CFG_TB_UPDATE_COUNT)(r9) -#endif + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) cmpl cr0,r8,r0 /* check if updated */ bne- 1b @@ -322,3 +269,98 @@ __do_get_xsec: */ 3: blr .cfi_endproc + +/* + * This is the core of clock_gettime(), it returns the current + * time in seconds and nanoseconds in r3 and r4. + * It expects the datapage ptr in r9 and doesn't clobber it. + * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7. + * On return, r8 contains the counter value that can be reused. + * This clobbers cr0 but not any other cr field. + */ +__do_get_tspec: + .cfi_startproc + /* Check for update count & load values. We use the low + * order 32 bits of the update count + */ +1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r9,r9,r0 + + /* Load orig stamp (offset to TB) */ + lwz r5,CFG_TB_ORIG_STAMP(r9) + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) + + /* Get a stable TB value */ +2: mftbu r3 + mftbl r4 + mftbu r0 + cmpl cr0,r3,r0 + bne- 2b + + /* Subtract tb orig stamp and shift left 12 bits. + */ + subfc r7,r6,r4 + subfe r0,r5,r3 + slwi r0,r0,12 + rlwimi. r0,r7,12,20,31 + slwi r7,r7,12 + + /* Load scale factor & do multiplication */ + lwz r5,CFG_TB_TO_XS(r9) /* load values */ + lwz r6,(CFG_TB_TO_XS+4)(r9) + mulhwu r3,r7,r6 + mullw r10,r7,r5 + mulhwu r4,r7,r5 + addc r10,r3,r10 + li r3,0 + + beq+ 4f /* skip high part computation if 0 */ + mulhwu r3,r0,r5 + mullw r7,r0,r5 + mulhwu r5,r0,r6 + mullw r6,r0,r6 + adde r4,r4,r7 + addze r3,r3 + addc r4,r4,r5 + addze r3,r3 + addc r10,r10,r6 + +4: addze r4,r4 /* add in carry */ + lis r7,NSEC_PER_SEC@h + ori r7,r7,NSEC_PER_SEC@l + mulhwu r4,r4,r7 /* convert to nanoseconds */ + + /* At this point, we have seconds & nanoseconds since the xtime + * stamp in r3+CA and r4. Load & add the xtime stamp. + */ +#ifdef CONFIG_PPC64 + lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9) + lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9) +#else + lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9) + lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9) +#endif + add r4,r4,r6 + adde r3,r3,r5 + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + or r6,r4,r3 + xor r0,r6,r6 + add r9,r9,r0 + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) + cmpl cr0,r8,r0 /* check if updated */ + bne- 1b + + /* check for nanosecond overflow and adjust if necessary */ + cmpw r4,r7 + bltlr /* all done if no overflow */ + subf r4,r7,r4 /* adjust if overflow */ + addi r3,r3,1 + + blr + .cfi_endproc |