/* * This file contains the light-weight system call handlers (fsyscall-handlers). * * Copyright (C) 2003 Hewlett-Packard Co * David Mosberger-Tang * * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). * 18-Feb-03 louisk Implement fsys_gettimeofday(). * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, * probably broke it along the way... ;-) * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make * it capable of using memory based clocks without falling back to C code. * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. * */ #include #include #include #include #include #include #include #include #include "entry.h" #include "paravirt_inst.h" /* * See Documentation/ia64/fsys.txt for details on fsyscalls. * * On entry to an fsyscall handler: * r10 = 0 (i.e., defaults to "successful syscall return") * r11 = saved ar.pfs (a user-level value) * r15 = system call number * r16 = "current" task pointer (in normal kernel-mode, this is in r13) * r32-r39 = system call arguments * b6 = return address (a user-level value) * ar.pfs = previous frame-state (a user-level value) * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) * all other registers may contain values passed in from user-mode * * On return from an fsyscall handler: * r11 = saved ar.pfs (as passed into the fsyscall handler) * r15 = system call number (as passed into the fsyscall handler) * r32-r39 = system call arguments (as passed into the fsyscall handler) * b6 = return address (as passed into the fsyscall handler) * ar.pfs = previous frame-state (as passed into the fsyscall handler) */ ENTRY(fsys_ni_syscall) .prologue .altrp b6 .body mov r8=ENOSYS mov r10=-1 FSYS_RETURN END(fsys_ni_syscall) ENTRY(fsys_getpid) .prologue .altrp b6 .body add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 ;; ld8 r17=[r17] // r17 = current->group_leader add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] add r17=IA64_TASK_TGIDLINK_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid ;; add r8=IA64_PID_LEVEL_OFFSET,r17 ;; ld4 r8=[r8] // r8 = pid->level add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] ;; shl r8=r8,IA64_UPID_SHIFT ;; add r17=r17,r8 // r17 = &pid->numbers[pid->level] ;; ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr ;; mov r17=0 ;; cmp.ne p8,p0=0,r9 (p8) br.spnt.many fsys_fallback_syscall FSYS_RETURN END(fsys_getpid) ENTRY(fsys_getppid) .prologue .altrp b6 .body add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 ;; ld8 r17=[r17] // r17 = current->group_leader add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent ;; and r9=TIF_ALLWORK_MASK,r9 1: ld8 r18=[r17] // r18 = current->group_leader->real_parent ;; cmp.ne p8,p0=0,r9 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid ;; /* * The .acq is needed to ensure that the read of tgid has returned its data before * we re-check "real_parent". */ ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid #ifdef CONFIG_SMP /* * Re-read current->group_leader->real_parent. */ ld8 r19=[r17] // r19 = current->group_leader->real_parent (p8) br.spnt.many fsys_fallback_syscall ;; cmp.ne p6,p0=r18,r19 // did real_parent change? mov r19=0 // i must not leak kernel bits... (p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check ;; mov r17=0 // i must not leak kernel bits... mov r18=0 // i must not leak kernel bits... #else mov r17=0 // i must not leak kernel bits... mov r18=0 // i must not leak kernel bits... mov r19=0 // i must not leak kernel bits... #endif FSYS_RETURN END(fsys_getppid) ENTRY(fsys_set_tid_address) .prologue .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 add r17=IA64_TASK_TGIDLINK_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid ;; and r9=TIF_ALLWORK_MASK,r9 add r8=IA64_PID_LEVEL_OFFSET,r17 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 ;; ld4 r8=[r8] // r8 = pid->level add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] ;; shl r8=r8,IA64_UPID_SHIFT ;; add r17=r17,r8 // r17 = &pid->numbers[pid->level] ;; ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr ;; cmp.ne p8,p0=0,r9 mov r17=-1 ;; (p6) st8 [r18]=r32 (p7) st8 [r18]=r17 (p8) br.spnt.many fsys_fallback_syscall ;; mov r17=0 // i must not leak kernel bits... mov r18=0 // i must not leak kernel bits... FSYS_RETURN END(fsys_set_tid_address) #if IA64_GTOD_LOCK_OFFSET !=0 #error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t #endif #if IA64_ITC_JITTER_OFFSET !=0 #error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t #endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 #define CLOCK_DIVIDE_BY_1000 0x4000 #define CLOCK_ADD_MONOTONIC 0x8000 ENTRY(fsys_gettimeofday) .prologue .altrp b6 .body mov r31 = r32 tnat.nz p6,p0 = r33 // guard against NaT argument (p6) br.cond.spnt.few .fail_einval mov r30 = CLOCK_DIVIDE_BY_1000 ;; .gettime: // Register map // Incoming r31 = pointer to address where to place result // r30 = flags determining how time is processed // r2,r3 = temp r4-r7 preserved // r8 = result nanoseconds // r9 = result seconds // r10 = temporary storage for clock difference // r11 = preserved: saved ar.pfs // r12 = preserved: memory stack // r13 = preserved: thread pointer // r14 = address of mask / mask value // r15 = preserved: system call number // r16 = preserved: current task pointer // r17 = (not used) // r18 = (not used) // r19 = address of itc_lastcycle // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) // r21 = address of mmio_ptr // r22 = address of wall_time or monotonic_time // r23 = address of shift / value // r24 = address mult factor / cycle_last value // r25 = itc_lastcycle value // r26 = address clocksource cycle_last // r27 = (not used) // r28 = sequence number at the beginning of critcal section // r29 = address of itc_jitter // r30 = time processing flags / memory address // r31 = pointer to result // Predicates // p6,p7 short term use // p8 = timesource ar.itc // p9 = timesource mmio64 // p10 = timesource mmio32 - not used // p11 = timesource not to be handled by asm code // p12 = memory time source ( = p9 | p10) - not used // p13 = do cmpxchg with itc_lastcycle // p14 = Divide by 1000 // p15 = Add monotonic // // Note that instructions are optimized for McKinley. McKinley can // process two bundles simultaneously and therefore we continuously // try to feed the CPU two bundles and then a stop. add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 tnat.nz p6,p0 = r31 // guard against Nat argument (p6) br.cond.spnt.few .fail_einval movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address ;; ld4 r2 = [r2] // process work pending flags movl r29 = itc_jitter_data // itc_jitter add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 mov pr = r30,0xc000 // Set predicates according to function ;; and r2 = TIF_ALLWORK_MASK,r2 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 (p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time ;; add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled (p6) br.cond.spnt.many fsys_fallback_syscall ;; // Begin critical section .time_redo: ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first ;; and r28 = ~1,r28 // And make sequence even to force retry if odd ;; ld8 r30 = [r21] // clocksource->mmio_ptr add r24 = IA64_CLKSRC_MULT_OFFSET,r20 ld4 r2 = [r29] // itc_jitter value add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 add r14 = IA64_CLKSRC_MASK_OFFSET,r20 ;; ld4 r3 = [r24] // clocksource mult value ld8 r14 = [r14] // clocksource mask value cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr ;; setf.sig f7 = r3 // Setup for mult scaling of counter (p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 ld4 r23 = [r23] // clocksource shift value ld8 r24 = [r26] // get clksrc_cycle_last value (p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control ;; .pred.rel.mutex p8,p9 MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec ;; ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec (p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) ;; (p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared sub r10 = r2,r24 // current_cycle - last_cycle ;; (p6) sub r10 = r25,r24 // time we got was less than last_cycle (p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg ;; (p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv ;; (p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful ;; (p7) sub r10 = r3,r24 // then use new last_cycle instead ;; and r10 = r10,r14 // Apply mask ;; setf.sig f8 = r10 nop.i 123 ;; // fault check takes 5 cycles and we have spare time EX(.fail_efault, probe.w.fault r31, 3) xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) ;; getf.sig r2 = f8 mf ;; ld4 r10 = [r20] // gtod_lock.sequence shr.u r2 = r2,r23 // shift by factor ;; add r8 = r8,r2 // Add xtime.nsecs cmp4.ne p7,p0 = r28,r10 (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo // End critical section. // Now r8=tv->tv_nsec and r9=tv->tv_sec mov r10 = r0 movl r2 = 1000000000 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 (p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack ;; .time_normalize: mov r21 = r8 cmp.ge p6,p0 = r8,r2 (p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time ;; (p14) setf.sig f8 = r20 (p6) sub r8 = r8,r2 (p6) add r9 = 1,r9 // two nops before the branch. (p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod (p6) br.cond.dpnt.few .time_normalize ;; // Divided by 8 though shift. Now divide by 125 // The compiler was able to do that with a multiply // and a shift and we do the same EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it ;; (p14) getf.sig r2 = f8 ;; mov r8 = r0 (p14) shr.u r21 = r2, 4 ;; EX(.fail_efault, st8 [r31] = r9) EX(.fail_efault, st8 [r23] = r21) FSYS_RETURN .fail_einval: mov r8 = EINVAL mov r10 = -1 FSYS_RETURN .fail_efault: mov r8 = EFAULT mov r10 = -1 FSYS_RETURN END(fsys_gettimeofday) ENTRY(fsys_clock_gettime) .prologue .altrp b6 .body cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC (p6) br.spnt.few fsys_fallback_syscall mov r31 = r33 shl r30 = r32,15 br.many .gettime END(fsys_clock_gettime) /* * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). */ #if _NSIG_WORDS != 1 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. #endif ENTRY(fsys_rt_sigprocmask) .prologue .altrp b6 .body add r2=IA64_TASK_BLOCKED_OFFSET,r16 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 cmp4.ltu p6,p0=SIG_SETMASK,r32 cmp.ne p15,p0=r0,r34 // oset != NULL? tnat.nz p8,p0=r34 add r31=IA64_TASK_SIGHAND_OFFSET,r16 ;; ld8 r3=[r2] // read/prefetch current->blocked ld4 r9=[r9] tnat.nz.or p6,p0=r35 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 tnat.nz.or p6,p0=r32 (p6) br.spnt.few .fail_einval // fail with EINVAL ;; #ifdef CONFIG_SMP ld8 r31=[r31] // r31 <- current->sighand #endif and r9=TIF_ALLWORK_MASK,r9 tnat.nz.or p8,p0=r33 ;; cmp.ne p7,p0=0,r9 cmp.eq p6,p0=r0,r33 // set == NULL? add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock (p8) br.spnt.few .fail_efault // fail with EFAULT (p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... (p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask /* Argh, we actually have to do some work and _update_ the signal mask: */ EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) ;; RSM_PSR_I(p0, r18, r19) // mask interrupt delivery andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP mov r8=EINVAL // default to EINVAL #ifdef CONFIG_SMP // __ticket_spin_trylock(r31) ld4 r17=[r31] ;; mov.m ar.ccv=r17 extr.u r9=r17,17,15 adds r19=1,r17 extr.u r18=r17,0,15 ;; cmp.eq p6,p7=r9,r18 ;; (p6) cmpxchg4.acq r9=[r31],r19,ar.ccv (p6) dep.z r20=r19,1,15 // next serving ticket for unlock (p7) br.cond.spnt.many .lock_contention ;; cmp4.eq p0,p7=r9,r17 adds r31=2,r31 (p7) br.cond.spnt.many .lock_contention ld8 r3=[r2] // re-read current->blocked now that we hold the lock ;; #else ld8 r3=[r2] // re-read current->blocked now that we hold the lock #endif add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 add r19=IA64_TASK_SIGNAL_OFFSET,r16 cmp4.eq p6,p0=SIG_BLOCK,r32 ;; ld8 r19=[r19] // r19 <- current->signal cmp4.eq p7,p0=SIG_UNBLOCK,r32 cmp4.eq p8,p0=SIG_SETMASK,r32 ;; ld8 r18=[r18] // r18 <- current->pending.signal .pred.rel.mutex p6,p7,p8 (p6) or r14=r3,r14 // SIG_BLOCK (p7) andcm r14=r3,r14 // SIG_UNBLOCK (p8) mov r14=r14 // SIG_SETMASK (p6) mov r8=0 // clear error code // recalc_sigpending() add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 ;; ld4 r17=[r17] // r17 <- current->signal->group_stop_count (p7) mov r8=0 // clear error code ld8 r19=[r19] // r19 <- current->signal->shared_pending ;; cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? (p8) mov r8=0 // clear error code or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending ;; // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: andcm r18=r18,r14 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; (p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending mov r19=0 // i must not leak kernel bits... (p6) br.cond.dpnt.many .sig_pending ;; 1: ld4 r17=[r9] // r17 <- current->thread_info->flags ;; mov ar.ccv=r17 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) ;; st8 [r2]=r14 // update current->blocked with new mask cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 ;; cmp.ne p6,p0=r17,r8 // update failed? (p6) br.cond.spnt.few 1b // yes -> retry #ifdef CONFIG_SMP // __ticket_spin_unlock(r31) st2.rel [r31]=r20 mov r20=0 // i must not leak kernel bits... #endif SSM_PSR_I(p0, p9, r31) ;; srlz.d // ensure psr.i is set again mov r18=0 // i must not leak kernel bits... .store_mask: EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset EX(.fail_efault, (p15) st8 [r34]=r3) mov r2=0 // i must not leak kernel bits... mov r3=0 // i must not leak kernel bits... mov r8=0 // return 0 mov r9=0 // i must not leak kernel bits... mov r14=0 // i must not leak kernel bits... mov r17=0 // i must not leak kernel bits... mov r31=0 // i must not leak kernel bits... FSYS_RETURN .sig_pending: #ifdef CONFIG_SMP // __ticket_spin_unlock(r31) st2.rel [r31]=r20 // release the lock #endif SSM_PSR_I(p0, p9, r17) ;; srlz.d br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall #ifdef CONFIG_SMP .lock_contention: /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ SSM_PSR_I(p0, p9, r17) ;; srlz.d br.sptk.many fsys_fallback_syscall #endif END(fsys_rt_sigprocmask) /* * fsys_getcpu doesn't use the third parameter in this implementation. It reads * current_thread_info()->cpu and corresponding node in cpu_to_node_map. */ ENTRY(fsys_getcpu) .prologue .altrp b6 .body ;; add r2=TI_FLAGS+IA64_TASK_SIZE,r16 tnat.nz p6,p0 = r32 // guard against NaT argument add r3=TI_CPU+IA64_TASK_SIZE,r16 ;; ld4 r3=[r3] // M r3 = thread_info->cpu ld4 r2=[r2] // M r2 = thread_info->flags (p6) br.cond.spnt.few .fail_einval // B ;; tnat.nz p7,p0 = r33 // I guard against NaT argument (p7) br.cond.spnt.few .fail_einval // B #ifdef CONFIG_NUMA movl r17=cpu_to_node_map ;; EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles shladd r18=r3,1,r17 ;; ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] and r2 = TIF_ALLWORK_MASK,r2 ;; cmp.ne p8,p0=0,r2 (p8) br.spnt.many fsys_fallback_syscall ;; ;; EX(.fail_efault, st4 [r32] = r3) EX(.fail_efault, st2 [r33] = r20) mov r8=0 ;; #else EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles and r2 = TIF_ALLWORK_MASK,r2 ;; cmp.ne p8,p0=0,r2 (p8) br.spnt.many fsys_fallback_syscall ;; EX(.fail_efault, st4 [r32] = r3) EX(.fail_efault, st2 [r33] = r0) mov r8=0 ;; #endif FSYS_RETURN END(fsys_getcpu) ENTRY(fsys_fallback_syscall) .prologue .altrp b6 .body /* * We only get here from light-weight syscall handlers. Thus, we already * know that r15 contains a valid syscall number. No need to re-check. */ adds r17=-1024,r15 movl r14=sys_call_table ;; RSM_PSR_I(p0, r26, r27) shladd r18=r17,3,r14 ;; ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) mov r27=ar.rsc mov r21=ar.fpsr mov r26=ar.pfs END(fsys_fallback_syscall) /* FALL THROUGH */ GLOBAL_ENTRY(paravirt_fsys_bubble_down) .prologue .altrp b6 .body /* * We get here for syscalls that don't have a lightweight * handler. For those, we need to bubble down into the kernel * and that requires setting up a minimal pt_regs structure, * and initializing the CPU state more or less as if an * interruption had occurred. To make syscall-restarts work, * we setup pt_regs such that cr_iip points to the second * instruction in syscall_via_break. Decrementing the IP * hence will restart the syscall via break and not * decrementing IP will return us to the caller, as usual. * Note that we preserve the value of psr.pp rather than * initializing it from dcr.pp. This makes it possible to * distinguish fsyscall execution from other privileged * execution. * * On entry: * - normal fsyscall handler register usage, except * that we also have: * - r18: address of syscall entry point * - r21: ar.fpsr * - r26: ar.pfs * - r27: ar.rsc * - r29: psr * * We used to clear some PSR bits here but that requires slow * serialization. Fortuntely, that isn't really necessary. * The rationale is as follows: we used to clear bits * ~PSR_PRESERVED_BITS in PSR.L. Since * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. * However, * * PSR.BE : already is turned off in __kernel_syscall_via_epc() * PSR.AC : don't care (kernel normally turns PSR.AC on) * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets * invoked * PSR.DFL: always 0 (kernel never turns it on) * PSR.DFH: don't care --- kernel never touches f32-f127 on its own * initiative * PSR.DI : always 0 (kernel never turns it on) * PSR.SI : always 0 (kernel never turns it on) * PSR.DB : don't care --- kernel never enables kernel-level * breakpoints * PSR.TB : must be 0 already; if it wasn't zero on entry to * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down * will trigger a taken branch; the taken-trap-handler then * converts the syscall into a break-based system-call. */ /* * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. * The rest we have to synthesize. */ # define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ | (0x1 << IA64_PSR_RI_BIT) \ | IA64_PSR_BN | IA64_PSR_I) invala // M0|1 movl r14=ia64_ret_from_syscall // X nop.m 0 movl r28=__kernel_syscall_via_break // X create cr.iip ;; mov r2=r16 // A get task addr to addl-addressable register adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A mov r31=pr // I0 save pr (2 cyc) ;; st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A ;; ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 #ifdef CONFIG_VIRT_CPU_ACCOUNTING MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 #endif nop.i 0 ;; mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) nop.i 0 ;; mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS movl r8=PSR_ONE_BITS // X ;; mov r25=ar.unat // M2 (5 cyc) save ar.unat mov r19=b6 // I0 save b6 (2 cyc) mov r20=r1 // A save caller's gp in r20 ;; or r29=r8,r29 // A construct cr.ipsr value to save mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; #ifdef CONFIG_VIRT_CPU_ACCOUNTING // mov.m r30=ar.itc is called in advance add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 ;; ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel ;; ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime ld8 r21=[r17] // cumulated utime sub r22=r19,r18 // stime before leave kernel ;; st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp sub r18=r30,r19 // elapsed time in user mode ;; add r20=r20,r22 // sum stime add r21=r21,r18 // sum utime ;; st8 [r16]=r20 // update stime st8 [r17]=r21 // update utime ;; #endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A ;; SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs cmp.eq p8,p0=r3,r0 // A (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT nop.m 0 (p8) br.call.sptk.many b6=b6 // B (ignore return address) br.cond.spnt ia64_trace_syscall // B END(paravirt_fsys_bubble_down) .rodata .align 8 .globl paravirt_fsyscall_table data8 paravirt_fsys_bubble_down paravirt_fsyscall_table: data8 fsys_ni_syscall data8 0 // exit // 1025 data8 0 // read data8 0 // write data8 0 // open data8 0 // close data8 0 // creat // 1030 data8 0 // link data8 0 // unlink data8 0 // execve data8 0 // chdir data8 0 // fchdir // 1035 data8 0 // utimes data8 0 // mknod data8 0 // chmod data8 0 // chown data8 0 // lseek // 1040 data8 fsys_getpid // getpid data8 fsys_getppid // getppid data8 0 // mount data8 0 // umount data8 0 // setuid // 1045 data8 0 // getuid data8 0 // geteuid data8 0 // ptrace data8 0 // access data8 0 // sync // 1050 data8 0 // fsync data8 0 // fdatasync data8 0 // kill data8 0 // rename data8 0 // mkdir // 1055 data8 0 // rmdir data8 0 // dup data8 0 // pipe data8 0 // times data8 0 // brk // 1060 data8 0 // setgid data8 0 // getgid data8 0 // getegid data8 0 // acct data8 0 // ioctl // 1065 data8 0 // fcntl data8 0 // umask data8 0 // chroot data8 0 // ustat data8 0 // dup2 // 1070 data8 0 // setreuid data8 0 // setregid data8 0 // getresuid data8 0 // setresuid data8 0 // getresgid // 1075 data8 0 // setresgid data8 0 // getgroups data8 0 // setgroups data8 0 // getpgid data8 0 // setpgid // 1080 data8 0 // setsid data8 0 // getsid data8 0 // sethostname data8 0 // setrlimit data8 0 // getrlimit // 1085 data8 0 // getrusage data8 fsys_gettimeofday // gettimeofday data8 0 // settimeofday data8 0 // select data8 0 // poll // 1090 data8 0 // symlink data8 0 // readlink data8 0 // uselib data8 0 // swapon data8 0 // swapoff // 1095 data8 0 // reboot data8 0 // truncate data8 0 // ftruncate data8 0 // fchmod data8 0 // fchown // 1100 data8 0 // getpriority data8 0 // setpriority data8 0 // statfs data8 0 // fstatfs data8 0 // gettid // 1105 data8 0 // semget data8 0 // semop data8 0 // semctl data8 0 // msgget data8 0 // msgsnd // 1110 data8 0 // msgrcv data8 0 // msgctl data8 0 // shmget data8 0 // shmat data8 0 // shmdt // 1115 data8 0 // shmctl data8 0 // syslog data8 0 // setitimer data8 0 // getitimer data8 0 // 1120 data8 0 data8 0 data8 0 // vhangup data8 0 // lchown data8 0 // remap_file_pages // 1125 data8 0 // wait4 data8 0 // sysinfo data8 0 // clone data8 0 // setdomainname data8 0 // newuname // 1130 data8 0 // adjtimex data8 0 data8 0 // init_module data8 0 // delete_module data8 0 // 1135 data8 0 data8 0 // quotactl data8 0 // bdflush data8 0 // sysfs data8 0 // personality // 1140 data8 0 // afs_syscall data8 0 // setfsuid data8 0 // setfsgid data8 0 // getdents data8 0 // flock // 1145 data8 0 // readv data8 0 // writev data8 0 // pread64 data8 0 // pwrite64 data8 0 // sysctl // 1150 data8 0 // mmap data8 0 // munmap data8 0 // mlock data8 0 // mlockall data8 0 // mprotect // 1155 data8 0 // mremap data8 0 // msync data8 0 // munlock data8 0 // munlockall data8 0 // sched_getparam // 1160 data8 0 // sched_setparam data8 0 // sched_getscheduler data8 0 // sched_setscheduler data8 0 // sched_yield data8 0 // sched_get_priority_max // 1165 data8 0 // sched_get_priority_min data8 0 // sched_rr_get_interval data8 0 // nanosleep data8 0 // nfsservctl data8 0 // prctl // 1170 data8 0 // getpagesize data8 0 // mmap2 data8 0 // pciconfig_read data8 0 // pciconfig_write data8 0 // perfmonctl // 1175 data8 0 // sigaltstack data8 0 // rt_sigaction data8 0 // rt_sigpending data8 fsys_rt_sigprocmask // rt_sigprocmask data8 0 // rt_sigqueueinfo // 1180 data8 0 // rt_sigreturn data8 0 // rt_sigsuspend data8 0 // rt_sigtimedwait data8 0 // getcwd data8 0 // capget // 1185 data8 0 // capset data8 0 // sendfile data8 0 data8 0 data8 0 // socket // 1190 data8 0 // bind data8 0 // connect data8 0 // listen data8 0 // accept data8 0 // getsockname // 1195 data8 0 // getpeername data8 0 // socketpair data8 0 // send data8 0 // sendto data8 0 // recv // 1200 data8 0 // recvfrom data8 0 // shutdown data8 0 // setsockopt data8 0 // getsockopt data8 0 // sendmsg // 1205 data8 0 // recvmsg data8 0 // pivot_root data8 0 // mincore data8 0 // madvise data8 0 // newstat // 1210 data8 0 // newlstat data8 0 // newfstat data8 0 // clone2 data8 0 // getdents64 data8 0 // getunwind // 1215 data8 0 // readahead data8 0 // setxattr data8 0 // lsetxattr data8 0 // fsetxattr data8 0 // getxattr // 1220 data8 0 // lgetxattr data8 0 // fgetxattr data8 0 // listxattr data8 0 // llistxattr data8 0 // flistxattr // 1225 data8 0 // removexattr data8 0 // lremovexattr data8 0 // fremovexattr data8 0 // tkill data8 0 // futex // 1230 data8 0 // sched_setaffinity data8 0 // sched_getaffinity data8 fsys_set_tid_address // set_tid_address data8 0 // fadvise64_64 data8 0 // tgkill // 1235 data8 0 // exit_group data8 0 // lookup_dcookie data8 0 // io_setup data8 0 // io_destroy data8 0 // io_getevents // 1240 data8 0 // io_submit data8 0 // io_cancel data8 0 // epoll_create data8 0 // epoll_ctl data8 0 // epoll_wait // 1245 data8 0 // restart_syscall data8 0 // semtimedop data8 0 // timer_create data8 0 // timer_settime data8 0 // timer_gettime // 1250 data8 0 // timer_getoverrun data8 0 // timer_delete data8 0 // clock_settime data8 fsys_clock_gettime // clock_gettime data8 0 // clock_getres // 1255 data8 0 // clock_nanosleep data8 0 // fstatfs64 data8 0 // statfs64 data8 0 // mbind data8 0 // get_mempolicy // 1260 data8 0 // set_mempolicy data8 0 // mq_open data8 0 // mq_unlink data8 0 // mq_timedsend data8 0 // mq_timedreceive // 1265 data8 0 // mq_notify data8 0 // mq_getsetattr data8 0 // kexec_load data8 0 // vserver data8 0 // waitid // 1270 data8 0 // add_key data8 0 // request_key data8 0 // keyctl data8 0 // ioprio_set data8 0 // ioprio_get // 1275 data8 0 // move_pages data8 0 // inotify_init data8 0 // inotify_add_watch data8 0 // inotify_rm_watch data8 0 // migrate_pages // 1280 data8 0 // openat data8 0 // mkdirat data8 0 // mknodat data8 0 // fchownat data8 0 // futimesat // 1285 data8 0 // newfstatat data8 0 // unlinkat data8 0 // renameat data8 0 // linkat data8 0 // symlinkat // 1290 data8 0 // readlinkat data8 0 // fchmodat data8 0 // faccessat data8 0 data8 0 // 1295 data8 0 // unshare data8 0 // splice data8 0 // set_robust_list data8 0 // get_robust_list data8 0 // sync_file_range // 1300 data8 0 // tee data8 0 // vmsplice data8 0 data8 fsys_getcpu // getcpu // 1304 // fill in zeros for the remaining entries .zero: .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0