diff options
Diffstat (limited to 'arch/ia64/kernel')
36 files changed, 2051 insertions, 934 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 33e5a598672d..13fd10e8699e 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ - salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ + salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ unwind.o mca.o mca_asm.o topology.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 78f28d825f30..43687cc60dfb 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -117,7 +117,10 @@ acpi_get_sysname(void) if (!strcmp(hdr->oem_id, "HP")) { return "hpzx1"; } else if (!strcmp(hdr->oem_id, "SGI")) { - return "sn2"; + if (!strcmp(hdr->oem_table_id + 4, "UV")) + return "uv"; + else + return "sn2"; } return "dig"; @@ -130,6 +133,8 @@ acpi_get_sysname(void) return "hpzx1_swiotlb"; # elif defined (CONFIG_IA64_SGI_SN2) return "sn2"; +# elif defined (CONFIG_IA64_SGI_UV) + return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; # else @@ -423,6 +428,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) static struct acpi_table_slit __initdata *slit_table; +cpumask_t early_cpu_possible_map = CPU_MASK_NONE; static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) { @@ -459,7 +465,6 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", len, slit->header.length); - memset(numa_slit, 10, sizeof(numa_slit)); return; } slit_table = slit; @@ -482,6 +487,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) (pa->apic_id << 8) | (pa->local_sapic_eid); /* nid should be overridden as logical node id later */ node_cpuid[srat_num_cpus].nid = pxm; + cpu_set(srat_num_cpus, early_cpu_possible_map); srat_num_cpus++; } @@ -559,7 +565,7 @@ void __init acpi_numa_arch_fixup(void) } /* set logical node id in cpu structure */ - for (i = 0; i < srat_num_cpus; i++) + for_each_possible_early_cpu(i) node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", @@ -567,8 +573,14 @@ void __init acpi_numa_arch_fixup(void) printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); - if (!slit_table) + if (!slit_table) { + for (i = 0; i < MAX_NUMNODES; i++) + for (j = 0; j < MAX_NUMNODES; j++) + node_distance(i, j) = i == j ? LOCAL_DISTANCE : + REMOTE_DISTANCE; return; + } + memset(numa_slit, -1, sizeof(numa_slit)); for (i = 0; i < slit_table->locality_count; i++) { if (!pxm_bit_test(i)) @@ -620,6 +632,9 @@ void acpi_unregister_gsi(u32 gsi) if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) return; + if (has_8259 && gsi < 16) + return; + iosapic_unregister_intr(gsi); } @@ -964,7 +979,7 @@ acpi_map_iosapics (void) fs_initcall(acpi_map_iosapics); #endif /* CONFIG_ACPI_NUMA */ -int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { int err; diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 0aebc6f79e95..c64a55af9b95 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -7,8 +7,9 @@ #define ASM_OFFSETS_C 1 #include <linux/sched.h> +#include <linux/pid.h> #include <linux/clocksource.h> - +#include <linux/kbuild.h> #include <asm-ia64/processor.h> #include <asm-ia64/ptrace.h> #include <asm-ia64/siginfo.h> @@ -18,11 +19,6 @@ #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - void foo(void) { DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); @@ -34,17 +30,29 @@ void foo(void) DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + BUILD_BUG_ON(sizeof(struct upid) != 32); + DEFINE(IA64_UPID_SHIFT, 5); + BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); + DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); + DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); + DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime)); +#endif BLANK(); DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); + DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid)); + DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); + DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index fbe742ad2fde..f065093f8e9b 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -24,6 +24,7 @@ int kdump_status[NR_CPUS]; static atomic_t kdump_cpu_frozen; atomic_t kdump_in_progress; static int kdump_on_init = 1; +static int kdump_on_fatal_mca = 1; static inline Elf64_Word *append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, @@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt) static void machine_kdump_on_init(void) { + crash_save_vmcoreinfo(); local_irq_disable(); kexec_disable_iosapic(); machine_kexec(ia64_kimage); @@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) struct ia64_mca_notify_die *nd; struct die_args *args = data; - if (!kdump_on_init) + if (!kdump_on_init && !kdump_on_fatal_mca) return NOTIFY_DONE; if (!ia64_kimage) { @@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) return NOTIFY_DONE; switch (val) { - case DIE_INIT_MONARCH_PROCESS: + case DIE_INIT_MONARCH_PROCESS: + if (kdump_on_init) { atomic_set(&kdump_in_progress, 1); *(nd->monarch_cpu) = -1; - break; - case DIE_INIT_MONARCH_LEAVE: + } + break; + case DIE_INIT_MONARCH_LEAVE: + if (kdump_on_init) machine_kdump_on_init(); - break; - case DIE_INIT_SLAVE_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_RENDZVOUS_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_MONARCH_LEAVE: - /* die_register->signr indicate if MCA is recoverable */ - if (!args->signr) - machine_kdump_on_init(); - break; + break; + case DIE_INIT_SLAVE_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_RENDZVOUS_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_MONARCH_LEAVE: + /* *(nd->data) indicate if MCA is recoverable */ + if (kdump_on_fatal_mca && !(*(nd->data))) { + atomic_set(&kdump_in_progress, 1); + *(nd->monarch_cpu) = -1; + machine_kdump_on_init(); + } + break; } return NOTIFY_DONE; } #ifdef CONFIG_SYSCTL -static ctl_table kdump_on_init_table[] = { +static ctl_table kdump_ctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_init", @@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kdump_on_fatal_mca", + .data = &kdump_on_fatal_mca, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; @@ -215,7 +231,7 @@ static ctl_table sys_table[] = { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, - .child = kdump_on_init_table, + .child = kdump_ctl_table, }, { .ctl_name = 0 } }; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 728d7247a1a6..d45f215bc8fc 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -37,6 +37,7 @@ #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mca.h> +#include <asm/tlbflush.h> #define EFI_DEBUG 0 @@ -403,6 +404,41 @@ efi_get_pal_addr (void) return NULL; } + +static u8 __init palo_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + + return sum; +} + +/* + * Parse and handle PALO table which is published at: + * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf + */ +static void __init handle_palo(unsigned long palo_phys) +{ + struct palo_table *palo = __va(palo_phys); + u8 checksum; + + if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { + printk(KERN_INFO "PALO signature incorrect.\n"); + return; + } + + checksum = palo_checksum((u8 *)palo, palo->length); + if (checksum) { + printk(KERN_INFO "PALO checksum incorrect.\n"); + return; + } + + setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO); +} + void efi_map_pal_code (void) { @@ -432,6 +468,7 @@ efi_init (void) u64 efi_desc_size; char *cp, vendor[100] = "unknown"; int i; + unsigned long palo_phys; /* * It's too early to be able to use the standard kernel command line @@ -496,6 +533,8 @@ efi_init (void) efi.hcdp = EFI_INVALID_TABLE_ADDR; efi.uga = EFI_INVALID_TABLE_ADDR; + palo_phys = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < (int) efi.systab->nr_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { efi.mps = config_tables[i].table; @@ -515,10 +554,17 @@ efi_init (void) } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, + PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) { + palo_phys = config_tables[i].table; + printk(" PALO=0x%lx", config_tables[i].table); } } printk("\n"); + if (palo_phys != EFI_INVALID_TABLE_ADDR) + handle_palo(palo_phys); + runtime = __va(efi.systab->runtime); efi.get_time = phys_get_time; efi.set_time = phys_set_time; diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3c331c464b40..ca2bb95726de 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -570,6 +570,7 @@ GLOBAL_ENTRY(ia64_trace_syscall) br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value .ret3: (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +(pUStk) rsm psr.i // disable interrupts br.cond.sptk .work_pending_syscall_end strace_error: @@ -710,6 +711,16 @@ ENTRY(ia64_leave_syscall) (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif .work_processed_syscall: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + adds r2=PT(LOADRS)+16,r12 +(pUStk) mov.m r22=ar.itc // fetch time at leave + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + adds r3=PT(AR_BSPSTORE)+16,r12 // deferred + ;; +#else adds r2=PT(LOADRS)+16,r12 adds r3=PT(AR_BSPSTORE)+16,r12 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -718,6 +729,7 @@ ENTRY(ia64_leave_syscall) ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" nop.i 0 ;; +#endif mov r16=ar.bsp // M2 get existing backing store pointer ld8 r18=[r2],PT(R9)-PT(B6) // load b6 (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? @@ -737,12 +749,21 @@ ENTRY(ia64_leave_syscall) ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#else mov r22=r0 // A clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; +#endif ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled nop 0 @@ -759,7 +780,11 @@ ENTRY(ia64_leave_syscall) ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) st1 [r15]=r17 // M2|3 +#else (pUStk) st1 [r14]=r17 // M2|3 +#endif ld8.fill r13=[r3],16 // M0|1 mov f8=f0 // F clear f8 ;; @@ -775,12 +800,22 @@ ENTRY(ia64_leave_syscall) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition cover // B add current frame into dirty partition & set cr.ifs ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov r19=ar.bsp // M2 get new backing store pointer + st8 [r14]=r22 // M save time at leave + mov f10=f0 // F clear f10 + + mov r22=r0 // A clear r22 + movl r14=__kernel_syscall_via_epc // X + ;; +#else mov r19=ar.bsp // M2 get new backing store pointer mov f10=f0 // F clear f10 nop.m 0 movl r14=__kernel_syscall_via_epc // X ;; +#endif mov.m ar.csd=r0 // M2 clear ar.csd mov.m ar.ccv=r0 // M2 clear ar.ccv mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) @@ -913,10 +948,18 @@ GLOBAL_ENTRY(ia64_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + .pred.rel.mutex pUStk,pKStk +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled +(pUStk) mov.m r22=ar.itc // M fetch time at leave + nop.i 0 + ;; +#else (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled nop.i 0 nop.i 0 ;; +#endif ld8 r29=[r16],16 // load cr.ipsr ld8 r28=[r17],16 // load cr.iip ;; @@ -938,15 +981,37 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 +#else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 +#endif ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred +#endif ;; ld8.fill r14=[r16],16 ld8.fill r2=[r17] (pUStk) mov r17=1 ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; + // mib : mov add br -> mib : ld8 add br + // bbb_ : br nop cover;; mbb_ : mov br cover;; + // + // no one require bsp in r16 if (pKStk) branch is selected. +(pUStk) st8 [r3]=r22 // save time at leave +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + ld8.fill r3=[r16] // deferred + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch + mov r16=ar.bsp // get existing backing store pointer +#else ld8.fill r3=[r16] (pUStk) st1 [r18]=r17 // restore current->thread.on_ustack shr.u r18=r19,16 // get byte size of existing "dirty" partition @@ -954,6 +1019,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov r16=ar.bsp // get existing backing store pointer LOAD_PHYS_STACK_REG_SIZE(r17) (pKStk) br.cond.dpnt skip_rbs_switch +#endif /* * Restore user backing store. @@ -1090,6 +1156,9 @@ skip_rbs_switch: * r31 = current->thread_info->flags * On exit: * p6 = TRUE if work-pending-check needs to be redone + * + * Interrupts are disabled on entry, reenabled depend on work, and + * disabled on exit. */ .work_pending_syscall: add r2=-8,r2 @@ -1098,16 +1167,16 @@ skip_rbs_switch: st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? + tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? (p6) br.cond.sptk.few .notify #ifdef CONFIG_PREEMPT (pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 ;; (pKStk) st4 [r20]=r21 - ssm psr.i // enable interrupts #endif + ssm psr.i // enable interrupts br.call.spnt.many rp=schedule -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 +.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) rsm psr.i // disable interrupts ;; #ifdef CONFIG_PREEMPT @@ -1116,13 +1185,13 @@ skip_rbs_switch: (pKStk) st4 [r20]=r0 // preempt_count() <- 0 #endif (pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check + br.cond.sptk.many .work_processed_kernel .notify: (pUStk) br.call.spnt.many rp=notify_resume_user -.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 +.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) (pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // don't re-check + br.cond.sptk.many .work_processed_kernel .work_pending_syscall_end: adds r2=PT(R8)+16,r12 @@ -1130,7 +1199,7 @@ skip_rbs_switch: ;; ld8 r8=[r2] ld8 r10=[r3] - br.cond.sptk.many .work_processed_syscall // re-check + br.cond.sptk.many .work_processed_syscall END(ia64_leave_kernel) @@ -1168,9 +1237,12 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail) END(ia64_invoke_schedule_tail) /* - * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to - * be set up by the caller. We declare 8 input registers so the system call - * args get preserved, in case we need to restart a system call. + * Setup stack and call do_notify_resume_user(), keeping interrupts + * disabled. + * + * Note that pSys and pNonSys need to be set up by the caller. + * We declare 8 input registers so the system call args get preserved, + * in case we need to restart a system call. */ ENTRY(notify_resume_user) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 44841971f077..c1625c7e1779 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -61,13 +61,29 @@ ENTRY(fsys_getpid) .prologue .altrp b6 .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] - add r8=IA64_TASK_TGID_OFFSET,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 - ld4 r8=[r8] // r8 = current->tgid + ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid + ;; + add r8=IA64_PID_LEVEL_OFFSET,r17 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + mov r17=0 ;; cmp.ne p8,p0=0,r9 (p8) br.spnt.many fsys_fallback_syscall @@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address) .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT + ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid ;; and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_TASK_PID_OFFSET,r16 + add r8=IA64_PID_LEVEL_OFFSET,r17 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 ;; - ld4 r8=[r8] + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; cmp.ne p8,p0=0,r9 mov r17=-1 ;; @@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday) // Note that instructions are optimized for McKinley. McKinley can // process two bundles simultaneously and therefore we continuously // try to feed the CPU two bundles and then a stop. - // - // Additional note that code has changed a lot. Optimization is TBD. - // Comments begin with "?" are maybe outdated. - tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle - mov pr = r30,0xc000 // Set predicates according to function + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r31 // guard against Nat argument +(p6) br.cond.spnt.few .fail_einval movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address ;; + ld4 r2 = [r2] // process work pending flags movl r29 = itc_jitter_data // itc_jitter add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time - ld4 r2 = [r2] // process work pending flags - ;; -(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 - add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 + mov pr = r30,0xc000 // Set predicates according to function + ;; and r2 = TIF_ALLWORK_MASK,r2 -(p6) br.cond.spnt.few .fail_einval // ? deferred branch + add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 +(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time ;; - add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last + add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall +(p6) br.cond.spnt.many fsys_fallback_syscall ;; // Begin critical section .time_redo: @@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday) (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value - ;; // ? could be removed by moving the last add upward ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec ;; ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec @@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday) EX(.fail_efault, probe.w.fault r31, 3) xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) ;; - // ? simulate tbit.nz.or p7,p0 = r28,0 getf.sig r2 = f8 mf ;; ld4 r10 = [r20] // gtod_lock.sequence shr.u r2 = r2,r23 // shift by factor - ;; // ? overloaded 3 bundles! + ;; add r8 = r8,r2 // Add xtime.nsecs cmp4.ne p7,p0 = r28,r10 (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo @@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3) EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it ;; - mov r8 = r0 (p14) getf.sig r2 = f8 ;; + mov r8 = r0 (p14) shr.u r21 = r2, 4 ;; EX(.fail_efault, st8 [r31] = r9) @@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov.m r30=ar.itc // M get cycle for accounting +#else nop.m 0 +#endif nop.i 0 ;; mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore @@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mov.m r30=ar.itc is called in advance + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r30,r19 // elapsed time in user mode + ;; + add r20=r20,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r20 // update stime + st8 [r17]=r21 // update utime + ;; +#endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index d3a41d5f8d12..ddeab4e36fd5 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock) br.ret.sptk.many rp END(sched_clock) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +GLOBAL_ENTRY(cycle_to_cputime) + alloc r16=ar.pfs,1,0,0,0 + addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r32 + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(cycle_to_cputime) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + GLOBAL_ENTRY(start_kernel_thread) .prologue .save rp, r0 // this is the end of the call-chain diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 8e7193d55528..6da1f20d7372 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page); EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ EXPORT_SYMBOL(csum_ipv6_magic); -#include <asm/semaphore.h> -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); -EXPORT_SYMBOL(__down_trylock); -EXPORT_SYMBOL(__up); - #include <asm/page.h> EXPORT_SYMBOL(clear_page); diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index bc8efcad28b8..9d7e1c66faf4 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -18,7 +18,6 @@ #include <asm/pgtable.h> static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 082c31dcfd99..39752cdef6ff 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void) if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES); - if (!rte) - return NULL; for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) list_add(&rte->rte_list, &free_rte_list); } diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 6dee579f205f..7fd18f54c056 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -183,10 +183,10 @@ void fixup_irqs(void) { unsigned int irq; extern void ia64_process_pending_intr(void); - extern void ia64_disable_timer(void); extern volatile int time_keeper_id; - ia64_disable_timer(); + /* Mask ITV to disable timer */ + ia64_set_itv(1 << 16); /* * Find a new timesync master diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index d8be23fbe6bc..5538471e8d68 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) static unsigned char count; static long last_time; - if (jiffies - last_time > 5*HZ) + if (time_after(jiffies, last_time + 5 * HZ)) count = 0; if (++count < 5) { last_time = jiffies; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 34f44d8be00d..80b44ea052d7 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -805,8 +805,13 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + ;; + mov b6=r30 // I0 setup syscall handler branch reg early +#else nop.i 0 ;; +#endif mov.m r25=ar.unat // M2 (5 cyc) dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr @@ -817,7 +822,11 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov.m r30=ar.itc // M get cycle for accounting +#else mov b6=r30 // I0 setup syscall handler branch reg early +#endif cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit @@ -829,6 +838,30 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mov.m r30=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A +(pKStk) br.cond.spnt .skip_accounting // B unlikely skip + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime + ld8 r21=[r17] // M cumulated utime + sub r22=r19,r18 // A stime before leave + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp + sub r18=r30,r19 // A elapsed time in user + ;; + add r20=r20,r22 // A sum stime + add r21=r21,r18 // A sum utime + ;; + st8 [r16]=r20 // M update stime + st8 [r17]=r21 // M update utime + ;; +.skip_accounting: +#endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 nop 0 bsw.1 // B (6 cyc) regs are saved, switch to bank 1 @@ -928,6 +961,7 @@ END(interrupt) * - r27: saved ar.rsc * - r28: saved cr.iip * - r29: saved cr.ipsr + * - r30: ar.itc for accounting (don't touch) * - r31: saved pr * - b0: original contents (to be saved) * On exit: @@ -1042,53 +1076,46 @@ END(ia64_syscall_setup) DBG_FAULT(15) FAULT(15) + .org ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + DBG_FAULT(16) + FAULT(16) + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING /* - * Squatting in this space ... + * There is no particular reason for this code to be here, other than + * that there happens to be space here that would go unused otherwise. + * If this fault ever gets "unreserved", simply moved the following + * code to a more suitable spot... * - * This special case dispatcher for illegal operation faults allows preserved - * registers to be modified through a callback function (asm only) that is handed - * back from the fault handler in r8. Up to three arguments can be passed to the - * callback function by returning an aggregate with the callback as its first - * element, followed by the arguments. + * account_sys_enter is called from SAVE_MIN* macros if accounting is + * enabled and if the macro is entered from user mode. */ -ENTRY(dispatch_illegal_op_fault) - .prologue - .body - SAVE_MIN_WITH_COVER - ssm psr.ic | PSR_DEFAULT_BITS +ENTRY(account_sys_enter) + // mov.m r20=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 ;; - srlz.i // guarantee that interruption collection is on + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel + ;; + ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST + st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r20,r19 // elapsed time in user mode ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in insn group - mov out0=ar.ec + add r23=r23,r22 // sum stime + add r21=r21,r18 // sum utime ;; - SAVE_REST - PT_REGS_UNWIND_INFO(0) + st8 [r16]=r23 // update stime + st8 [r17]=r21 // update utime ;; - br.call.sptk.many rp=ia64_illegal_op_fault -.ret0: ;; - alloc r14=ar.pfs,0,0,3,0 // must be first in insn group - mov out0=r9 - mov out1=r10 - mov out2=r11 - movl r15=ia64_leave_kernel - ;; - mov rp=r15 - mov b6=r8 - ;; - cmp.ne p6,p0=0,r8 -(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel - br.sptk.many ia64_leave_kernel -END(dispatch_illegal_op_fault) - - .org ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - DBG_FAULT(16) - FAULT(16) + br.ret.sptk.many rp +END(account_sys_enter) +#endif .org ia64_ivt+0x4400 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1646,6 +1673,48 @@ END(ia32_interrupt) DBG_FAULT(67) FAULT(67) + /* + * Squatting in this space ... + * + * This special case dispatcher for illegal operation faults allows preserved + * registers to be modified through a callback function (asm only) that is handed + * back from the fault handler in r8. Up to three arguments can be passed to the + * callback function by returning an aggregate with the callback as its first + * element, followed by the arguments. + */ +ENTRY(dispatch_illegal_op_fault) + .prologue + .body + SAVE_MIN_WITH_COVER + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in insn group + mov out0=ar.ec + ;; + SAVE_REST + PT_REGS_UNWIND_INFO(0) + ;; + br.call.sptk.many rp=ia64_illegal_op_fault +.ret0: ;; + alloc r14=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r9 + mov out1=r10 + mov out2=r11 + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + mov b6=r8 + ;; + cmp.ne p6,p0=0,r8 +(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel + br.sptk.many ia64_leave_kernel +END(dispatch_illegal_op_fault) + #ifdef CONFIG_IA32_SUPPORT /* diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 8d9a446a0d17..233434f4f88f 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = { { u, u, u }, /* 1F */ }; +/* Insert a long branch code */ +static void __kprobes set_brl_inst(void *from, void *to) +{ + s64 rel = ((s64) to - (s64) from) >> 4; + bundle_t *brl; + brl = (bundle_t *) ((u64) from & ~0xf); + brl->quad0.template = 0x05; /* [MLX](stop) */ + brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ + brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; + brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); + /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ + brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); +} + /* * In this function we check to see if the instruction * is IP relative instruction and update the kprobe @@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; } +/* Check the instruction in the slot is break */ +static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot) +{ + unsigned int major_opcode; + unsigned int template = bundle->quad0.template; + unsigned long kprobe_inst; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get Kprobe probe instruction at given slot*/ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + /* For break instruction, + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) { + /* Not a break instruction */ + return 0; + } + + /* Is a break instruction */ + return 1; +} + +/* + * In this function, we check whether the target bundle modifies IP or + * it triggers an exception. If so, it cannot be boostable. + */ +static int __kprobes can_boost(bundle_t *bundle, uint slot, + unsigned long bundle_addr) +{ + unsigned int template = bundle->quad0.template; + + do { + if (search_exception_tables(bundle_addr + slot) || + __is_ia64_break_inst(bundle, slot)) + return 0; /* exception may occur in this bundle*/ + } while ((++slot) < 3); + template &= 0x1e; + if (template >= 0x10 /* including B unit */ || + template == 0x04 /* including X unit */ || + template == 0x06) /* undefined */ + return 0; + + return 1; +} + +/* Prepare long jump bundle and disables other boosters if need */ +static void __kprobes prepare_booster(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr & ~0xFULL; + unsigned int slot = (unsigned long)p->addr & 0xf; + struct kprobe *other_kp; + + if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) { + set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1); + p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; + } + + /* disables boosters in previous slots */ + for (; addr < (unsigned long)p->addr; addr++) { + other_kp = get_kprobe((void *)addr); + if (other_kp) + other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE; + } +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long) p->addr; @@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); + prepare_booster(p); + return 0; } @@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) src = &p->opcode.bundle; flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + (unsigned long)p->ainsn.insn + + sizeof(kprobe_opcode_t) * MAX_INSN_SIZE); + switch (p->ainsn.slot) { case 0: dest->quad0.slot0 = src->quad0.slot0; @@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); + free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); mutex_unlock(&kprobe_mutex); } /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction - * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust * the ip to point back to the original stack address. To set the IP address * to original stack address, handle the case where we need to fixup the * relative IP address and/or fixup branch register. @@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) if (slot == 1 && bundle_encoding[template][1] == L) slot = 2; - if (p->ainsn.inst_flag) { + if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { /* Fix relative IP address */ @@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) static int __kprobes is_ia64_break_inst(struct pt_regs *regs) { unsigned int slot = ia64_psr(regs)->ri; - unsigned int template, major_opcode; - unsigned long kprobe_inst; unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; bundle_t bundle; memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); - template = bundle.quad0.template; - - /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ - if (slot == 1 && bundle_encoding[template][1] == L) - slot++; - /* Get Kprobe probe instruction at given slot*/ - get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode); - - /* For break instruction, - * Bits 37:40 Major opcode to be zero - * Bits 27:32 X6 to be zero - * Bits 32:35 X3 to be zero - */ - if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) { - /* Not a break instruction */ - return 0; - } - - /* Is a break instruction */ - return 1; + return __is_ia64_break_inst(&bundle, slot); } static int __kprobes pre_kprobes_handler(struct die_args *args) @@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) + if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + ia64_psr(regs)->ri = p->ainsn.slot; + regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL; + /* turn single stepping off */ + ia64_psr(regs)->ss = 0; + + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } +#endif prepare_ss(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; return 1; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6c18221dba36..705176b434b3 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -69,6 +69,7 @@ * 2007-04-27 Russ Anderson <rja@sgi.com> * Support multiple cpus going through OS_MCA in the same event. */ +#include <linux/jiffies.h> #include <linux/types.h> #include <linux/init.h> #include <linux/sched.h> @@ -97,6 +98,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> +#include <asm/tlb.h> #include "mca_drv.h" #include "entry.h" @@ -107,11 +109,26 @@ # define IA64_MCA_DEBUG(fmt...) #endif +#define NOTIFY_INIT(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +#define NOTIFY_MCA(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + /* Used by mca_asm.S */ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ +DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ unsigned long __per_cpu_mca[NR_CPUS]; @@ -293,7 +310,8 @@ static void ia64_mlogbuf_dump_from_init(void) if (mlogbuf_finished) return; - if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + if (mlogbuf_timestamp && + time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) { printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " " and the system seems to be messed up.\n"); ia64_mlogbuf_finish(0); @@ -762,9 +780,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) /* Mask all interrupts */ local_irq_save(flags); - if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -772,17 +789,13 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) */ ia64_sal_mc_rendez(); - if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; /* Enable all interrupts */ @@ -1182,6 +1195,49 @@ all_in: return; } +/* mca_insert_tr + * + * Switch rid when TR reload and needed! + * iord: 1: itr, 2: itr; + * +*/ +static void mca_insert_tr(u64 iord) +{ + + int i; + u64 old_rr; + struct ia64_tr_entry *p; + unsigned long psr; + int cpu = smp_processor_id(); + + psr = ia64_clear_ic(); + for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { + p = &__per_cpu_idtrs[cpu][iord-1][i]; + if (p->pte & 0x1) { + old_rr = ia64_get_rr(p->ifa); + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, p->rr); + ia64_srlz_d(); + } + ia64_ptr(iord, p->ifa, p->itir >> 2); + ia64_srlz_i(); + if (iord & 0x1) { + ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (iord & 0x2) { + ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, old_rr); + ia64_srlz_d(); + } + } + } + ia64_set_psr(psr); +} + /* * ia64_mca_handler * @@ -1209,7 +1265,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, int recover, cpu = smp_processor_id(); struct task_struct *previous_current; struct ia64_mca_notify_die nd = - { .sos = sos, .monarch_cpu = &monarch_cpu }; + { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; static atomic_t mca_count; static cpumask_t mca_cpu; @@ -1225,9 +1281,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); - if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; if (sos->monarch) { @@ -1241,13 +1295,12 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * does not work. */ ia64_mca_wakeup_all(); - if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); } else { while (cpu_isset(cpu, mca_cpu)) cpu_relax(); /* spin until monarch wakes us */ - } + } + + NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -1266,15 +1319,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, } else { /* Dump buffered message to console */ ia64_mlogbuf_finish(1); -#ifdef CONFIG_KEXEC - atomic_set(&kdump_in_progress, 1); - monarch_cpu = -1; -#endif } - if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + if (__get_cpu_var(ia64_mca_tr_reload)) { + mca_insert_tr(0x1); /*Reload dynamic itrs*/ + mca_insert_tr(0x2); /*Reload dynamic itrs*/ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); if (atomic_dec_return(&mca_count) > 0) { int i; @@ -1595,7 +1647,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); + NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); @@ -1632,17 +1684,15 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ - if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); + while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1651,9 +1701,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; - if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1668,12 +1716,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * to default_monarch_init_process() above and just print all the * tasks. */ - if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1905,7 +1950,7 @@ ia64_mca_init(void) printk(KERN_INFO "Increasing MCA rendezvous timeout from " "%ld to %ld milliseconds\n", timeout, isrv.v0); timeout = isrv.v0; - (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0); + NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); continue; } printk(KERN_ERR "Failed to register rendezvous interrupt " diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 8bc7d259e0c6..a06d46548ff9 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -219,8 +219,13 @@ ia64_reload_tr: mov r20=IA64_TR_CURRENT_STACK ;; itr.d dtr[r20]=r16 + GET_THIS_PADDR(r2, ia64_mca_tr_reload) + mov r18 = 1 ;; srlz.d + ;; + st8 [r2] =r18 + ;; done_tlb_purge_and_reload: diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index c9ac8bada786..74b6d670aaef 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -3,6 +3,21 @@ #include "entry.h" +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* read ar.itc in advance, and use it before leaving bank 0 */ +#define ACCOUNT_GET_STAMP \ +(pUStk) mov.m r20=ar.itc; +#define ACCOUNT_SYS_ENTER \ +(pUStk) br.call.spnt rp=account_sys_enter \ + ;; +#else +#define ACCOUNT_GET_STAMP +#define ACCOUNT_SYS_ENTER +#endif + +.section ".data.patch.rse", "a" +.previous + /* * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves * the minimum state necessary that allows us to turn psr.ic back @@ -28,7 +43,7 @@ * Note that psr.ic is NOT turned on by this macro. This is so that * we can pass interruption state as arguments to a handler. */ -#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ +#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND) \ mov r16=IA64_KR(CURRENT); /* M */ \ mov r27=ar.rsc; /* M */ \ mov r20=r1; /* A */ \ @@ -75,6 +90,7 @@ tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ mov r29=b0 \ ;; \ + WORKAROUND; \ adds r16=PT(R8),r1; /* initialize first base pointer */ \ adds r17=PT(R9),r1; /* initialize second base pointer */ \ (pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ @@ -122,11 +138,13 @@ ;; \ .mem.offset 0,0; st8.spill [r16]=r2,16; \ .mem.offset 8,0; st8.spill [r17]=r3,16; \ + ACCOUNT_GET_STAMP \ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ ;; \ EXTRA; \ movl r1=__gp; /* establish kernel global pointer */ \ ;; \ + ACCOUNT_SYS_ENTER \ bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ ;; @@ -192,6 +210,40 @@ st8 [r25]=r10; /* ar.ssd */ \ ;; -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,) -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) -#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, ) +#define RSE_WORKAROUND \ +(pUStk) extr.u r17=r18,3,6; \ +(pUStk) sub r16=r18,r22; \ +[1:](pKStk) br.cond.sptk.many 1f; \ + .xdata4 ".data.patch.rse",1b-. \ + ;; \ + cmp.ge p6,p7 = 33,r17; \ + ;; \ +(p6) mov r17=0x310; \ +(p7) mov r17=0x308; \ + ;; \ + cmp.leu p1,p0=r16,r17; \ +(p1) br.cond.sptk.many 1f; \ + dep.z r17=r26,0,62; \ + movl r16=2f; \ + ;; \ + mov ar.pfs=r17; \ + dep r27=r0,r27,16,14; \ + mov b0=r16; \ + ;; \ + br.ret.sptk b0; \ + ;; \ +2: \ + mov ar.rsc=r0 \ + ;; \ + flushrs; \ + ;; \ + mov ar.bspstore=r22 \ + ;; \ + mov r18=ar.bsp; \ + ;; \ +1: \ + .pred.rel "mutex", pKStk, pUStk + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) +#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a78b45f5fe2f..c93420c97409 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void) for(node=0; node < MAX_NUMNODES; node++) cpus_clear(node_to_cpu_mask[node]); - for(cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_early_cpu(cpu) { node = -1; for (i = 0; i < NR_CPUS; ++i) if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 396004e8cd14..9dc00f7fe10e 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -900,12 +900,6 @@ static void palinfo_smp_call(void *info) { palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; - if (data == NULL) { - printk(KERN_ERR "palinfo: data pointer is NULL\n"); - data->ret = 0; /* no output */ - return; - } - /* does this actual call */ data->ret = (*data->func)(data->page); } @@ -1053,7 +1047,7 @@ static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block palinfo_cpu_notifier __cpuinitdata = +static struct notifier_block __refdata palinfo_cpu_notifier = { .notifier_call = palinfo_cpu_callback, .priority = 0, diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index 2cb9425e0421..b83b2c516008 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -115,6 +115,29 @@ ia64_patch_vtop (unsigned long start, unsigned long end) ia64_srlz_i(); } +/* + * Disable the RSE workaround by turning the conditional branch + * that we tagged in each place the workaround was used into an + * unconditional branch. + */ +void __init +ia64_patch_rse (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip, *b; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + b = (u64 *)(ip & -16); + b[1] &= ~0xf800000L; + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + void __init ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) { @@ -135,10 +158,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) while (offp < (s32 *) end) { wp = (u64 *) ia64_imva((char *) offp + *offp); - wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ - wp[1] = 0x0004000000000200UL; - wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ - wp[3] = 0x0084006880000200UL; + wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ + wp[1] = 0x0084006880000200UL; + wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ + wp[3] = 0x0004000000000200UL; ia64_fc(wp); ia64_fc(wp + 2); ++offp; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index a2aabfdc80d9..7714a97b0104 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -867,7 +867,7 @@ pfm_rvfree(void *mem, unsigned long size) } static pfm_context_t * -pfm_context_alloc(void) +pfm_context_alloc(int ctx_flags) { pfm_context_t *ctx; @@ -878,6 +878,46 @@ pfm_context_alloc(void) ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) { DPRINT(("alloc ctx @%p\n", ctx)); + + /* + * init context protection lock + */ + spin_lock_init(&ctx->ctx_lock); + + /* + * context is unloaded + */ + ctx->ctx_state = PFM_CTX_UNLOADED; + + /* + * initialization of context's flags + */ + ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; + ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; + ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; + /* + * will move to set properties + * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; + */ + + /* + * init restart semaphore to locked + */ + init_completion(&ctx->ctx_restart_done); + + /* + * activation is used in SMP only + */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* + * initialize notification message queue + */ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + init_waitqueue_head(&ctx->ctx_msgq_wait); + init_waitqueue_head(&ctx->ctx_zombieq); + } return ctx; } @@ -1824,11 +1864,6 @@ pfm_flush(struct file *filp, fl_owner_t id) * invoked after, it will find an empty queue and no * signal will be sent. In both case, we are safe */ - if (filp->f_flags & FASYNC) { - DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); - pfm_do_fasync (-1, filp, ctx, 0); - } - PROTECT_CTX(ctx, flags); state = ctx->ctx_state; @@ -1959,6 +1994,11 @@ pfm_close(struct inode *inode, struct file *filp) return -EBADF; } + if (filp->f_flags & FASYNC) { + DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); + pfm_do_fasync(-1, filp, ctx, 0); + } + PROTECT_CTX(ctx, flags); state = ctx->ctx_state; @@ -2165,28 +2205,21 @@ static struct dentry_operations pfmfs_dentry_operations = { }; -static int -pfm_alloc_fd(struct file **cfile) +static struct file * +pfm_alloc_file(pfm_context_t *ctx) { - int fd, ret = 0; - struct file *file = NULL; - struct inode * inode; + struct file *file; + struct inode *inode; + struct dentry *dentry; char name[32]; struct qstr this; - fd = get_unused_fd(); - if (fd < 0) return -ENFILE; - - ret = -ENFILE; - - file = get_empty_filp(); - if (!file) goto out; - /* * allocate a new inode */ inode = new_inode(pfmfs_mnt->mnt_sb); - if (!inode) goto out; + if (!inode) + return ERR_PTR(-ENOMEM); DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); @@ -2199,59 +2232,28 @@ pfm_alloc_fd(struct file **cfile) this.len = strlen(name); this.hash = inode->i_ino; - ret = -ENOMEM; - /* * allocate a new dcache entry */ - file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_path.dentry) goto out; + dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } - file->f_path.dentry->d_op = &pfmfs_dentry_operations; + dentry->d_op = &pfmfs_dentry_operations; + d_add(dentry, inode); - d_add(file->f_path.dentry, inode); - file->f_path.mnt = mntget(pfmfs_mnt); - file->f_mapping = inode->i_mapping; + file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops); + if (!file) { + dput(dentry); + return ERR_PTR(-ENFILE); + } - file->f_op = &pfm_file_ops; - file->f_mode = FMODE_READ; file->f_flags = O_RDONLY; - file->f_pos = 0; - - /* - * may have to delay until context is attached? - */ - fd_install(fd, file); - - /* - * the file structure we will use - */ - *cfile = file; - - return fd; -out: - if (file) put_filp(file); - put_unused_fd(fd); - return ret; -} + file->private_data = ctx; -static void -pfm_free_fd(int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - - /* - * there ie no fd_uninstall(), so we do it here - */ - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - rcu_assign_pointer(fdt->fd[fd], NULL); - spin_unlock(&files->file_lock); - - if (file) - put_filp(file); - put_unused_fd(fd); + return file; } static int @@ -2475,6 +2477,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t /* link buffer format and context */ ctx->ctx_buf_fmt = fmt; + ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ /* * check if buffer format wants to use perfmon buffer allocation/mapping service @@ -2669,78 +2672,45 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg { pfarg_context_t *req = (pfarg_context_t *)arg; struct file *filp; + struct path path; int ctx_flags; + int fd; int ret; /* let's check the arguments first */ ret = pfarg_is_sane(current, req); - if (ret < 0) return ret; + if (ret < 0) + return ret; ctx_flags = req->ctx_flags; ret = -ENOMEM; - ctx = pfm_context_alloc(); - if (!ctx) goto error; + fd = get_unused_fd(); + if (fd < 0) + return fd; - ret = pfm_alloc_fd(&filp); - if (ret < 0) goto error_file; + ctx = pfm_context_alloc(ctx_flags); + if (!ctx) + goto error; - req->ctx_fd = ctx->ctx_fd = ret; + filp = pfm_alloc_file(ctx); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto error_file; + } - /* - * attach context to file - */ - filp->private_data = ctx; + req->ctx_fd = ctx->ctx_fd = fd; /* * does the user want to sample? */ if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); - if (ret) goto buffer_error; + if (ret) + goto buffer_error; } - /* - * init context protection lock - */ - spin_lock_init(&ctx->ctx_lock); - - /* - * context is unloaded - */ - ctx->ctx_state = PFM_CTX_UNLOADED; - - /* - * initialization of context's flags - */ - ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; - ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; - ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */ - ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; - /* - * will move to set properties - * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; - */ - - /* - * init restart semaphore to locked - */ - init_completion(&ctx->ctx_restart_done); - - /* - * activation is used in SMP only - */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* - * initialize notification message queue - */ - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; - init_waitqueue_head(&ctx->ctx_msgq_wait); - init_waitqueue_head(&ctx->ctx_zombieq); - DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", ctx, ctx_flags, @@ -2755,10 +2725,14 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg */ pfm_reset_pmu_state(ctx); + fd_install(fd, filp); + return 0; buffer_error: - pfm_free_fd(ctx->ctx_fd, filp); + path = filp->f_path; + put_filp(filp); + path_put(&path); if (ctx->ctx_buf_fmt) { pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); @@ -2767,6 +2741,7 @@ error_file: pfm_context_free(ctx); error: + put_unused_fd(fd); return ret; } @@ -4204,10 +4179,10 @@ pfm_check_task_exist(pfm_context_t *ctx) do_each_thread (g, t) { if (t->thread.pfm_context == ctx) { ret = 0; - break; + goto out; } } while_each_thread (g, t); - +out: read_unlock(&tasklist_lock); DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); @@ -5038,12 +5013,13 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) } static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); + /* * pfm_handle_work() can be called with interrupts enabled * (TIF_NEED_RESCHED) or disabled. The down_interruptible * call may sleep, therefore we must re-enable interrupts * to avoid deadlocks. It is safe to do so because this function - * is called ONLY when returning to user level (PUStk=1), in which case + * is called ONLY when returning to user level (pUStk=1), in which case * there is no risk of kernel stack overflow due to deep * interrupt nesting. */ @@ -5059,7 +5035,8 @@ pfm_handle_work(void) ctx = PFM_GET_CTX(current); if (ctx == NULL) { - printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current)); + printk(KERN_ERR "perfmon: [%d] has no PFM context\n", + task_pid_nr(current)); return; } @@ -5083,11 +5060,12 @@ pfm_handle_work(void) /* * must be done before we check for simple-reset mode */ - if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie; - + if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) + goto do_zombie; //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; - if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; + if (reason == PFM_TRAP_REASON_RESET) + goto skip_blocking; /* * restore interrupt mask to what it was on entry. @@ -5135,7 +5113,8 @@ do_zombie: /* * in case of interruption of down() we don't restart anything */ - if (ret < 0) goto nothing_to_do; + if (ret < 0) + goto nothing_to_do; skip_blocking: pfm_resume_after_ovfl(ctx, ovfl_regs, regs); @@ -5511,7 +5490,7 @@ stop_monitoring: } static int -pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) { struct task_struct *task; pfm_context_t *ctx; @@ -5591,7 +5570,7 @@ pfm_interrupt_handler(int irq, void *arg) start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(arg, regs); total_cycles = ia64_get_itc(); @@ -6695,16 +6674,12 @@ pfm_init(void) /* * create /proc/perfmon (mostly for debugging purposes) */ - perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); + perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); if (perfmon_dir == NULL) { printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); pmu_conf = NULL; return -1; } - /* - * install customized file operations for /proc/perfmon entry - */ - perfmon_dir->proc_fops = &pfm_proc_fops; /* * create /proc/sys/kernel/perfmon (for debugging purposes) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 49937a383b23..a3a34b4eb038 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -167,11 +167,18 @@ void tsk_clear_notify_resume(struct task_struct *tsk) clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); } +/* + * do_notify_resume_user(): + * Called from notify_resume_user at entry.S, with interrupts disabled. + */ void -do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) +do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) { if (fsys_mode(current, &scr->pt)) { - /* defer signal-handling etc. until we return to privilege-level 0. */ + /* + * defer signal-handling etc. until we return to + * privilege-level 0. + */ if (!ia64_psr(&scr->pt)->lp) ia64_psr(&scr->pt)->lp = 1; return; @@ -179,16 +186,26 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall #ifdef CONFIG_PERFMON if (current->thread.pfm_needs_checking) + /* + * Note: pfm_handle_work() allow us to call it with interrupts + * disabled, and may enable interrupts within the function. + */ pfm_handle_work(); #endif /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) + if (test_thread_flag(TIF_SIGPENDING)) { + local_irq_enable(); /* force interrupt enable */ ia64_do_signal(scr, in_syscall); + } /* copy user rbs to kernel rbs */ - if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { + local_irq_enable(); /* force interrupt enable */ ia64_sync_krbs(); + } + + local_irq_disable(); /* force interrupt disable */ } static int pal_halt = 1; @@ -625,21 +642,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg) do_dump_task_fpu(current, info, arg); } -int -dump_task_regs(struct task_struct *task, elf_gregset_t *regs) -{ - struct unw_frame_info tcore_info; - - if (current == task) { - unw_init_running(do_copy_regs, regs); - } else { - memset(&tcore_info, 0, sizeof(tcore_info)); - unw_init_from_blocked_task(&tcore_info, task); - do_copy_task_regs(task, &tcore_info, regs); - } - return 1; -} - void ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) { @@ -647,21 +649,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) } int -dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst) -{ - struct unw_frame_info tcore_info; - - if (current == task) { - unw_init_running(do_dump_fpu, dst); - } else { - memset(&tcore_info, 0, sizeof(tcore_info)); - unw_init_from_blocked_task(&tcore_info, task); - do_dump_task_fpu(task, &tcore_info, dst); - } - return 1; -} - -int dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) { unw_init_running(do_dump_fpu, dst); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index ab784ec4319d..2a9943b5947f 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -3,6 +3,9 @@ * * Copyright (C) 1999-2005 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2006 Intel Co + * 2006-08-12 - IA64 Native Utrace implementation support added by + * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> * * Derived from the x86 and Alpha versions. */ @@ -17,6 +20,8 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/signal.h> +#include <linux/regset.h> +#include <linux/elf.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task) psr->dfh = 1; } -static int -access_fr (struct unw_frame_info *info, int regnum, int hi, - unsigned long *data, int write_access) -{ - struct ia64_fpreg fpval; - int ret; - - ret = unw_get_fr(info, regnum, &fpval); - if (ret < 0) - return ret; - - if (write_access) { - fpval.u.bits[hi] = *data; - ret = unw_set_fr(info, regnum, fpval); - } else - *data = fpval.u.bits[hi]; - return ret; -} - /* * Change the machine-state of CHILD such that it will return via the normal * kernel exit-path, rather than the syscall-exit path. @@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt, static int access_uarea (struct task_struct *child, unsigned long addr, - unsigned long *data, int write_access) -{ - unsigned long *ptr, regnum, urbs_end, cfm; - struct switch_stack *sw; - struct pt_regs *pt; -# define pt_reg_addr(pt, reg) ((void *) \ - ((unsigned long) (pt) \ - + offsetof(struct pt_regs, reg))) - - - pt = task_pt_regs(child); - sw = (struct switch_stack *) (child->thread.ksp + 16); - - if ((addr & 0x7) != 0) { - dprintk("ptrace: unaligned register address 0x%lx\n", addr); - return -1; - } - - if (addr < PT_F127 + 16) { - /* accessing fph */ - if (write_access) - ia64_sync_fph(child); - else - ia64_flush_fph(child); - ptr = (unsigned long *) - ((unsigned long) &child->thread.fph + addr); - } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) { - /* scratch registers untouched by kernel (saved in pt_regs) */ - ptr = pt_reg_addr(pt, f10) + (addr - PT_F10); - } else if (addr >= PT_F12 && addr < PT_F15 + 16) { - /* - * Scratch registers untouched by kernel (saved in - * switch_stack). - */ - ptr = (unsigned long *) ((long) sw - + (addr - PT_NAT_BITS - 32)); - } else if (addr < PT_AR_LC + 8) { - /* preserved state: */ - struct unw_frame_info info; - char nat = 0; - int ret; - - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) - return -1; - - switch (addr) { - case PT_NAT_BITS: - return access_nat_bits(child, pt, &info, - data, write_access); - - case PT_R4: case PT_R5: case PT_R6: case PT_R7: - if (write_access) { - /* read NaT bit first: */ - unsigned long dummy; - - ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4, - &dummy, &nat); - if (ret < 0) - return ret; - } - return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data, - &nat, write_access); - - case PT_B1: case PT_B2: case PT_B3: - case PT_B4: case PT_B5: - return unw_access_br(&info, (addr - PT_B1)/8 + 1, data, - write_access); - - case PT_AR_EC: - return unw_access_ar(&info, UNW_AR_EC, data, - write_access); - - case PT_AR_LC: - return unw_access_ar(&info, UNW_AR_LC, data, - write_access); - - default: - if (addr >= PT_F2 && addr < PT_F5 + 16) - return access_fr(&info, (addr - PT_F2)/16 + 2, - (addr & 8) != 0, data, - write_access); - else if (addr >= PT_F16 && addr < PT_F31 + 16) - return access_fr(&info, - (addr - PT_F16)/16 + 16, - (addr & 8) != 0, - data, write_access); - else { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - } - } else if (addr < PT_F9+16) { - /* scratch state */ - switch (addr) { - case PT_AR_BSP: - /* - * By convention, we use PT_AR_BSP to refer to - * the end of the user-level backing store. - * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) - * to get the real value of ar.bsp at the time - * the kernel was entered. - * - * Furthermore, when changing the contents of - * PT_AR_BSP (or PT_CFM) while the task is - * blocked in a system call, convert the state - * so that the non-system-call exit - * path is used. This ensures that the proper - * state will be picked up when resuming - * execution. However, it *also* means that - * once we write PT_AR_BSP/PT_CFM, it won't be - * possible to modify the syscall arguments of - * the pending system call any longer. This - * shouldn't be an issue because modifying - * PT_AR_BSP/PT_CFM generally implies that - * we're either abandoning the pending system - * call or that we defer it's re-execution - * (e.g., due to GDB doing an inferior - * function call). - */ - urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); - if (write_access) { - if (*data != urbs_end) { - if (in_syscall(pt)) - convert_to_non_syscall(child, - pt, - cfm); - /* - * Simulate user-level write - * of ar.bsp: - */ - pt->loadrs = 0; - pt->ar_bspstore = *data; - } - } else - *data = urbs_end; - return 0; - - case PT_CFM: - urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); - if (write_access) { - if (((cfm ^ *data) & PFM_MASK) != 0) { - if (in_syscall(pt)) - convert_to_non_syscall(child, - pt, - cfm); - pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) - | (*data & PFM_MASK)); - } - } else - *data = cfm; - return 0; - - case PT_CR_IPSR: - if (write_access) { - unsigned long tmp = *data; - /* psr.ri==3 is a reserved value: SDM 2:25 */ - if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) - tmp &= ~IA64_PSR_RI; - pt->cr_ipsr = ((tmp & IPSR_MASK) - | (pt->cr_ipsr & ~IPSR_MASK)); - } else - *data = (pt->cr_ipsr & IPSR_MASK); - return 0; - - case PT_AR_RSC: - if (write_access) - pt->ar_rsc = *data | (3 << 2); /* force PL3 */ - else - *data = pt->ar_rsc; - return 0; - - case PT_AR_RNAT: - ptr = pt_reg_addr(pt, ar_rnat); - break; - case PT_R1: - ptr = pt_reg_addr(pt, r1); - break; - case PT_R2: case PT_R3: - ptr = pt_reg_addr(pt, r2) + (addr - PT_R2); - break; - case PT_R8: case PT_R9: case PT_R10: case PT_R11: - ptr = pt_reg_addr(pt, r8) + (addr - PT_R8); - break; - case PT_R12: case PT_R13: - ptr = pt_reg_addr(pt, r12) + (addr - PT_R12); - break; - case PT_R14: - ptr = pt_reg_addr(pt, r14); - break; - case PT_R15: - ptr = pt_reg_addr(pt, r15); - break; - case PT_R16: case PT_R17: case PT_R18: case PT_R19: - case PT_R20: case PT_R21: case PT_R22: case PT_R23: - case PT_R24: case PT_R25: case PT_R26: case PT_R27: - case PT_R28: case PT_R29: case PT_R30: case PT_R31: - ptr = pt_reg_addr(pt, r16) + (addr - PT_R16); - break; - case PT_B0: - ptr = pt_reg_addr(pt, b0); - break; - case PT_B6: - ptr = pt_reg_addr(pt, b6); - break; - case PT_B7: - ptr = pt_reg_addr(pt, b7); - break; - case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8: - case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8: - ptr = pt_reg_addr(pt, f6) + (addr - PT_F6); - break; - case PT_AR_BSPSTORE: - ptr = pt_reg_addr(pt, ar_bspstore); - break; - case PT_AR_UNAT: - ptr = pt_reg_addr(pt, ar_unat); - break; - case PT_AR_PFS: - ptr = pt_reg_addr(pt, ar_pfs); - break; - case PT_AR_CCV: - ptr = pt_reg_addr(pt, ar_ccv); - break; - case PT_AR_FPSR: - ptr = pt_reg_addr(pt, ar_fpsr); - break; - case PT_CR_IIP: - ptr = pt_reg_addr(pt, cr_iip); - break; - case PT_PR: - ptr = pt_reg_addr(pt, pr); - break; - /* scratch register */ - - default: - /* disallow accessing anything else... */ - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - } else if (addr <= PT_AR_SSD) { - ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD); - } else { - /* access debug registers */ - - if (addr >= PT_IBR) { - regnum = (addr - PT_IBR) >> 3; - ptr = &child->thread.ibr[0]; - } else { - regnum = (addr - PT_DBR) >> 3; - ptr = &child->thread.dbr[0]; - } - - if (regnum >= 8) { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } -#ifdef CONFIG_PERFMON - /* - * Check if debug registers are used by perfmon. This - * test must be done once we know that we can do the - * operation, i.e. the arguments are all valid, but - * before we start modifying the state. - * - * Perfmon needs to keep a count of how many processes - * are trying to modify the debug registers for system - * wide monitoring sessions. - * - * We also include read access here, because they may - * cause the PMU-installed debug register state - * (dbr[], ibr[]) to be reset. The two arrays are also - * used by perfmon, but we do not use - * IA64_THREAD_DBG_VALID. The registers are restored - * by the PMU context switch code. - */ - if (pfm_use_debug_registers(child)) return -1; -#endif - - if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { - child->thread.flags |= IA64_THREAD_DBG_VALID; - memset(child->thread.dbr, 0, - sizeof(child->thread.dbr)); - memset(child->thread.ibr, 0, - sizeof(child->thread.ibr)); - } - - ptr += regnum; - - if ((regnum & 1) && write_access) { - /* don't let the user set kernel-level breakpoints: */ - *ptr = *data & ~(7UL << 56); - return 0; - } - } - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; -} + unsigned long *data, int write_access); static long ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) @@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, if (test_thread_flag(TIF_RESTORE_RSE)) ia64_sync_krbs(); } + +/* Utrace implementation starts here */ +struct regset_get { + void *kbuf; + void __user *ubuf; +}; + +struct regset_set { + const void *kbuf; + const void __user *ubuf; +}; + +struct regset_getset { + struct task_struct *target; + const struct user_regset *regset; + union { + struct regset_get get; + struct regset_set set; + } u; + unsigned int pos; + unsigned int count; + int ret; +}; + +static int +access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + int ret; + char nat = 0; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_GR_OFFSET(1): + ptr = &pt->r1; + break; + case ELF_GR_OFFSET(2): + case ELF_GR_OFFSET(3): + ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2)); + break; + case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7): + if (write_access) { + /* read NaT bit first: */ + unsigned long dummy; + + ret = unw_get_gr(info, addr/8, &dummy, &nat); + if (ret < 0) + return ret; + } + return unw_access_gr(info, addr/8, data, &nat, write_access); + case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11): + ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8); + break; + case ELF_GR_OFFSET(12): + case ELF_GR_OFFSET(13): + ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12); + break; + case ELF_GR_OFFSET(14): + ptr = &pt->r14; + break; + case ELF_GR_OFFSET(15): + ptr = &pt->r15; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_breg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_BR_OFFSET(0): + ptr = &pt->b0; + break; + case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5): + return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8, + data, write_access); + case ELF_BR_OFFSET(6): + ptr = &pt->b6; + break; + case ELF_BR_OFFSET(7): + ptr = &pt->b7; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_areg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long cfm, urbs_end; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) { + switch (addr) { + case ELF_AR_RSC_OFFSET: + /* force PL3 */ + if (write_access) + pt->ar_rsc = *data | (3 << 2); + else + *data = pt->ar_rsc; + return 0; + case ELF_AR_BSP_OFFSET: + /* + * By convention, we use PT_AR_BSP to refer to + * the end of the user-level backing store. + * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) + * to get the real value of ar.bsp at the time + * the kernel was entered. + * + * Furthermore, when changing the contents of + * PT_AR_BSP (or PT_CFM) while the task is + * blocked in a system call, convert the state + * so that the non-system-call exit + * path is used. This ensures that the proper + * state will be picked up when resuming + * execution. However, it *also* means that + * once we write PT_AR_BSP/PT_CFM, it won't be + * possible to modify the syscall arguments of + * the pending system call any longer. This + * shouldn't be an issue because modifying + * PT_AR_BSP/PT_CFM generally implies that + * we're either abandoning the pending system + * call or that we defer it's re-execution + * (e.g., due to GDB doing an inferior + * function call). + */ + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (*data != urbs_end) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + /* + * Simulate user-level write + * of ar.bsp: + */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; + case ELF_AR_BSPSTORE_OFFSET: + ptr = &pt->ar_bspstore; + break; + case ELF_AR_RNAT_OFFSET: + ptr = &pt->ar_rnat; + break; + case ELF_AR_CCV_OFFSET: + ptr = &pt->ar_ccv; + break; + case ELF_AR_UNAT_OFFSET: + ptr = &pt->ar_unat; + break; + case ELF_AR_FPSR_OFFSET: + ptr = &pt->ar_fpsr; + break; + case ELF_AR_PFS_OFFSET: + ptr = &pt->ar_pfs; + break; + case ELF_AR_LC_OFFSET: + return unw_access_ar(info, UNW_AR_LC, data, + write_access); + case ELF_AR_EC_OFFSET: + return unw_access_ar(info, UNW_AR_EC, data, + write_access); + case ELF_AR_CSD_OFFSET: + ptr = &pt->ar_csd; + break; + case ELF_AR_SSD_OFFSET: + ptr = &pt->ar_ssd; + } + } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) { + switch (addr) { + case ELF_CR_IIP_OFFSET: + ptr = &pt->cr_iip; + break; + case ELF_CFM_OFFSET: + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (((cfm ^ *data) & PFM_MASK) != 0) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) + | (*data & PFM_MASK)); + } + } else + *data = cfm; + return 0; + case ELF_CR_IPSR_OFFSET: + if (write_access) { + unsigned long tmp = *data; + /* psr.ri==3 is a reserved value: SDM 2:25 */ + if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) + tmp &= ~IA64_PSR_RI; + pt->cr_ipsr = ((tmp & IPSR_MASK) + | (pt->cr_ipsr & ~IPSR_MASK)); + } else + *data = (pt->cr_ipsr & IPSR_MASK); + return 0; + } + } else if (addr == ELF_NAT_OFFSET) + return access_nat_bits(target, pt, info, + data, write_access); + else if (addr == ELF_PR_OFFSET) + ptr = &pt->pr; + else + return -1; + + if (write_access) + *ptr = *data; + else + *data = *ptr; + + return 0; +} + +static int +access_elf_reg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15)) + return access_elf_gpreg(target, info, addr, data, write_access); + else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) + return access_elf_breg(target, info, addr, data, write_access); + else + return access_elf_areg(target, info, addr, data, write_access); +} + +void do_gpregs_get(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index, min_copy; + + if (unw_unwind_to_user(info) < 0) + return; + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1 - gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_GR_OFFSET(16); + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret || dst->count == 0) + return; + } + + /* r16-r31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_CR_IIP_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_AR_END_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + } +} + +void do_gpregs_set(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1-gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + i = dst->pos; + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* gr16-gr31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + i = dst->pos; + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret) + return; + for (; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + i = dst->pos; + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + } +} + +#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t)) + +void do_fpregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + elf_fpreg_t tmp[30]; + int index, min_copy, i; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t); + + min_copy = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t), + index++) + if (unw_get_fr(info, i / sizeof(elf_fpreg_t), + &tmp[index])) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fph */ + if (dst->count > 0) { + ia64_flush_fph(dst->target); + if (task->thread.flags & IA64_THREAD_FPH_VALID) + dst->ret = user_regset_copyout( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + else + /* Zero fill instead. */ + dst->ret = user_regset_copyout_zero( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + ELF_FP_OFFSET(32), -1); + } +} + +void do_fpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + elf_fpreg_t fpreg, tmp[30]; + int index, start, end; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + start = dst->pos; + end = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->ret) + return; + + if (start & 0xF) { /* only write high part */ + if (unw_get_fr(info, start / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0] + = fpreg.u.bits[0]; + start &= ~0xFUL; + } + if (end & 0xF) { /* only write low part */ + if (unw_get_fr(info, end / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1] + = fpreg.u.bits[1]; + end = (end + 0xF) & ~0xFUL; + } + + for ( ; start < end ; start += sizeof(elf_fpreg_t)) { + index = start / sizeof(elf_fpreg_t); + if (unw_set_fr(info, index, tmp[index - 2])) { + dst->ret = -EIO; + return; + } + } + if (dst->ret || dst->count == 0) + return; + } + + /* fph */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) { + ia64_sync_fph(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + } +} + +static int +do_regset_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct regset_getset info = { .target = target, .regset = regset, + .pos = pos, .count = count, + .u.set = { .kbuf = kbuf, .ubuf = ubuf }, + .ret = 0 }; + + if (target == current) + unw_init_running(call, &info); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, target); + (*call)(&ufi, &info); + } + + return info.ret; +} + +static int +gpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_gpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int gpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_gpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static void do_gpregs_writeback(struct unw_frame_info *info, void *arg) +{ + do_sync_rbs(info, ia64_sync_user_rbs); +} + +/* + * This is called to write back the register backing store. + * ptrace does this before it stops, so that a tracer reading the user + * memory after the thread stops will get the current register data. + */ +static int +gpregs_writeback(struct task_struct *target, + const struct user_regset *regset, + int now) +{ + if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) + return 0; + tsk_set_notify_resume(target); + return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, + NULL, NULL); +} + +static int +fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32; +} + +static int fpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_fpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int +access_uarea(struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access) +{ + unsigned int pos = -1; /* an invalid value */ + int ret; + unsigned long *ptr, regnum; + + if ((addr & 0x7) != 0) { + dprintk("ptrace: unaligned register address 0x%lx\n", addr); + return -1; + } + if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) || + (addr >= PT_R7 + 8 && addr < PT_B1) || + (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) || + (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + + switch (addr) { + case PT_F32 ... (PT_F127 + 15): + pos = addr - PT_F32 + ELF_FP_OFFSET(32); + break; + case PT_F2 ... (PT_F5 + 15): + pos = addr - PT_F2 + ELF_FP_OFFSET(2); + break; + case PT_F10 ... (PT_F31 + 15): + pos = addr - PT_F10 + ELF_FP_OFFSET(10); + break; + case PT_F6 ... (PT_F9 + 15): + pos = addr - PT_F6 + ELF_FP_OFFSET(6); + break; + } + + if (pos != -1) { + if (write_access) + ret = fpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = fpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + switch (addr) { + case PT_NAT_BITS: + pos = ELF_NAT_OFFSET; + break; + case PT_R4 ... PT_R7: + pos = addr - PT_R4 + ELF_GR_OFFSET(4); + break; + case PT_B1 ... PT_B5: + pos = addr - PT_B1 + ELF_BR_OFFSET(1); + break; + case PT_AR_EC: + pos = ELF_AR_EC_OFFSET; + break; + case PT_AR_LC: + pos = ELF_AR_LC_OFFSET; + break; + case PT_CR_IPSR: + pos = ELF_CR_IPSR_OFFSET; + break; + case PT_CR_IIP: + pos = ELF_CR_IIP_OFFSET; + break; + case PT_CFM: + pos = ELF_CFM_OFFSET; + break; + case PT_AR_UNAT: + pos = ELF_AR_UNAT_OFFSET; + break; + case PT_AR_PFS: + pos = ELF_AR_PFS_OFFSET; + break; + case PT_AR_RSC: + pos = ELF_AR_RSC_OFFSET; + break; + case PT_AR_RNAT: + pos = ELF_AR_RNAT_OFFSET; + break; + case PT_AR_BSPSTORE: + pos = ELF_AR_BSPSTORE_OFFSET; + break; + case PT_PR: + pos = ELF_PR_OFFSET; + break; + case PT_B6: + pos = ELF_BR_OFFSET(6); + break; + case PT_AR_BSP: + pos = ELF_AR_BSP_OFFSET; + break; + case PT_R1 ... PT_R3: + pos = addr - PT_R1 + ELF_GR_OFFSET(1); + break; + case PT_R12 ... PT_R15: + pos = addr - PT_R12 + ELF_GR_OFFSET(12); + break; + case PT_R8 ... PT_R11: + pos = addr - PT_R8 + ELF_GR_OFFSET(8); + break; + case PT_R16 ... PT_R31: + pos = addr - PT_R16 + ELF_GR_OFFSET(16); + break; + case PT_AR_CCV: + pos = ELF_AR_CCV_OFFSET; + break; + case PT_AR_FPSR: + pos = ELF_AR_FPSR_OFFSET; + break; + case PT_B0: + pos = ELF_BR_OFFSET(0); + break; + case PT_B7: + pos = ELF_BR_OFFSET(7); + break; + case PT_AR_CSD: + pos = ELF_AR_CSD_OFFSET; + break; + case PT_AR_SSD: + pos = ELF_AR_SSD_OFFSET; + break; + } + + if (pos != -1) { + if (write_access) + ret = gpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = gpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + /* access debug registers */ + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + ptr = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + ptr = &child->thread.dbr[0]; + } + + if (regnum >= 8) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } +#ifdef CONFIG_PERFMON + /* + * Check if debug registers are used by perfmon. This + * test must be done once we know that we can do the + * operation, i.e. the arguments are all valid, but + * before we start modifying the state. + * + * Perfmon needs to keep a count of how many processes + * are trying to modify the debug registers for system + * wide monitoring sessions. + * + * We also include read access here, because they may + * cause the PMU-installed debug register state + * (dbr[], ibr[]) to be reset. The two arrays are also + * used by perfmon, but we do not use + * IA64_THREAD_DBG_VALID. The registers are restored + * by the PMU context switch code. + */ + if (pfm_use_debug_registers(child)) + return -1; +#endif + + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(child->thread.dbr, 0, + sizeof(child->thread.dbr)); + memset(child->thread.ibr, 0, + sizeof(child->thread.ibr)); + } + + ptr += regnum; + + if ((regnum & 1) && write_access) { + /* don't let the user set kernel-level breakpoints: */ + *ptr = *data & ~(7UL << 56); + return 0; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static const struct user_regset native_regsets[] = { + { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), + .get = gpregs_get, .set = gpregs_set, + .writeback = gpregs_writeback + }, + { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), + .get = fpregs_get, .set = fpregs_set, .active = fpregs_active + }, +}; + +static const struct user_regset_view user_ia64_view = { + .name = "ia64", + .e_machine = EM_IA_64, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) +{ +#ifdef CONFIG_IA32_SUPPORT + extern const struct user_regset_view user_ia32_view; + if (IS_IA32_PROCESS(task_pt_regs(tsk))) + return &user_ia32_view; +#endif + return &user_ia64_view; +} diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index a3022dc48ef8..0464173ea568 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -229,6 +229,14 @@ static void __init sal_desc_ap_wakeup(void *p) { } */ static int sal_cache_flush_drops_interrupts; +static int __init +force_pal_cache_flush(char *str) +{ + sal_cache_flush_drops_interrupts = 1; + return 0; +} +early_param("force_pal_cache_flush", force_pal_cache_flush); + void __init check_sal_cache_flush (void) { @@ -237,15 +245,17 @@ check_sal_cache_flush (void) u64 vector, cache_type = 3; struct ia64_sal_retval isrv; + if (sal_cache_flush_drops_interrupts) + return; + cpu = get_cpu(); local_irq_save(flags); /* - * Schedule a timer interrupt, wait until it's reported, and see if - * SAL_CACHE_FLUSH drops it. + * Send ourselves a timer interrupt, wait until it's reported, and see + * if SAL_CACHE_FLUSH drops it. */ - ia64_set_itv(IA64_TIMER_VECTOR); - ia64_set_itm(ia64_get_itc() + 1000); + platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0); while (!ia64_get_irr(IA64_TIMER_VECTOR)) cpu_relax(); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 779c3cca206c..ecb9eb78d687 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -44,8 +44,8 @@ #include <linux/smp.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/semaphore.h> -#include <asm/semaphore.h> #include <asm/sal.h> #include <asm/uaccess.h> @@ -648,18 +648,16 @@ salinfo_init(void) if (!dir) continue; - entry = create_proc_entry("event", S_IRUSR, dir); + entry = proc_create_data("event", S_IRUSR, dir, + &salinfo_event_fops, data); if (!entry) continue; - entry->data = data; - entry->proc_fops = &salinfo_event_fops; *sdir++ = entry; - entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir); + entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, + &salinfo_data_fops, data); if (!entry) continue; - entry->data = data; - entry->proc_fops = &salinfo_data_fops; *sdir++ = entry; /* we missed any events before now */ diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c deleted file mode 100644 index 2724ef3fbae2..000000000000 --- a/arch/ia64/kernel/semaphore.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * IA-64 semaphore implementation (derived from x86 version). - * - * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - -/* - * Semaphores are implemented using a two-way counter: The "count" - * variable is decremented for each process that tries to acquire the - * semaphore, while the "sleepers" variable is a count of such - * acquires. - * - * Notably, the inline "up()" and "down()" functions can efficiently - * test if they need to do any extra work (up needs to do something - * only if count was negative before the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is contention - * on the lock, and as such all this is the "non-critical" part of the - * whole semaphore business. The critical part is the inline stuff in - * <asm/semaphore.h> where we want to avoid any extra jumps and calls. - */ -#include <linux/sched.h> -#include <linux/init.h> - -#include <asm/errno.h> -#include <asm/semaphore.h> - -/* - * Logic: - * - Only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - When we go from a non-negative count to a negative do we - * (a) synchronize with the "sleepers" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -void -__up (struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -void __sched __down (struct semaphore *sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -int __sched __down_interruptible (struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers ++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for having decremented the - * count. - */ -int -__down_trylock (struct semaphore *sem) -{ - unsigned long flags; - int sleepers; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4aa9eaea76c3..4ae15c8c2488 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -59,6 +59,7 @@ #include <asm/setup.h> #include <asm/smp.h> #include <asm/system.h> +#include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/hpsim.h> @@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) return 0; } +/* + * Similar to "filter_rsvd_memory()", but the reserved memory ranges + * are not filtered out. + */ +int __init +filter_memory(unsigned long start, unsigned long end, void *arg) +{ + void (*func)(unsigned long, unsigned long, int); + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + func = arg; + if (start < end) + call_pernode_memory(__pa(start), end - start, func); + return 0; +} + static void __init sort_regions (struct rsvd_region *rsvd_region, int max) { @@ -215,6 +239,25 @@ __initcall(register_memory); #ifdef CONFIG_KEXEC + +/* + * This function checks if the reserved crashkernel is allowed on the specific + * IA64 machine flavour. Machines without an IO TLB use swiotlb and require + * some memory below 4 GB (i.e. in 32 bit area), see the implementation of + * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that + * in kdump case. See the comment in sba_init() in sba_iommu.c. + * + * So, the only machvec that really supports loading the kdump kernel + * over 4 GB is "sn2". + */ +static int __init check_crashkernel_memory(unsigned long pbase, size_t size) +{ + if (ia64_platform_is("sn2") || ia64_platform_is("uv")) + return 1; + else + return pbase < (1UL << 32); +} + static void __init setup_crashkernel(unsigned long total, int *n) { unsigned long long base = 0, size = 0; @@ -228,6 +271,16 @@ static void __init setup_crashkernel(unsigned long total, int *n) base = kdump_find_rsvd_region(size, rsvd_region, *n); } + + if (!check_crashkernel_memory(base, size)) { + pr_warning("crashkernel: There would be kdump memory " + "at %ld GB but this is unusable because it " + "must\nbe below 4 GB. Change the memory " + "configuration of the machine.\n", + (unsigned long)(base >> 30)); + return; + } + if (base != ~0UL) { printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -493,6 +546,8 @@ setup_arch (char **cmdline_p) acpi_table_init(); # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); + per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? + 32 : cpus_weight(early_cpu_possible_map)), additional_cpus); # endif #else # ifdef CONFIG_SMP @@ -505,6 +560,17 @@ setup_arch (char **cmdline_p) /* process SAL system table: */ ia64_sal_init(__va(efi.sal_systab)); +#ifdef CONFIG_ITANIUM + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); +#else + { + u64 num_phys_stacked; + + if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96) + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); + } +#endif + #ifdef CONFIG_SMP cpu_physical_id(0) = hard_smp_processor_id(); #endif @@ -512,8 +578,6 @@ setup_arch (char **cmdline_p) cpu_init(); /* initialize the bootstrap CPU */ mmu_context_init(); /* initialize context_id bitmap */ - check_sal_cache_flush(); - #ifdef CONFIG_ACPI acpi_boot_init(); #endif @@ -541,6 +605,7 @@ setup_arch (char **cmdline_p) ia64_mca_init(); platform_setup(cmdline_p); + check_sal_cache_flush(); paging_init(); } @@ -946,9 +1011,10 @@ cpu_init (void) #endif /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ - if (ia64_pal_vm_summary(NULL, &vmi) == 0) + if (ia64_pal_vm_summary(NULL, &vmi) == 0) { max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; - else { + setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL); + } else { printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); max_ctx = (1U << 15) - 1; /* use architected minimum */ } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 5740296c35af..19c5a78636fc 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -464,7 +464,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) if (!user_mode(&scr->pt)) return; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; @@ -530,12 +530,13 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * continue to iterate in this loop so we can deliver the SIGSEGV... */ if (handle_signal(signr, &ka, &info, oldset, scr)) { - /* a signal was successfully delivered; the saved + /* + * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; return; } } @@ -566,8 +567,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) /* if there's no signal to deliver, we just put the saved sigmask * back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 4e446aa5f4ac..983296f1c813 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -98,8 +98,33 @@ unlock_ipi_calllock(void) spin_unlock_irq(&call_lock); } +static inline void +handle_call_data(void) +{ + struct call_data_struct *data; + void (*func)(void *info); + void *info; + int wait; + + /* release the 'pointer lock' */ + data = (struct call_data_struct *)call_data; + func = data->func; + info = data->info; + wait = data->wait; + + mb(); + atomic_inc(&data->started); + /* At this point the structure may be gone unless wait is true. */ + (*func)(info); + + /* Notify the sending CPU that the task is done. */ + mb(); + if (wait) + atomic_inc(&data->finished); +} + static void -stop_this_cpu (void) +stop_this_cpu(void) { /* * Remove this CPU: @@ -138,44 +163,21 @@ handle_IPI (int irq, void *dev_id) ops &= ~(1 << which); switch (which) { - case IPI_CALL_FUNC: - { - struct call_data_struct *data; - void (*func)(void *info); - void *info; - int wait; - - /* release the 'pointer lock' */ - data = (struct call_data_struct *) call_data; - func = data->func; - info = data->info; - wait = data->wait; - - mb(); - atomic_inc(&data->started); - /* - * At this point the structure may be gone unless - * wait is true. - */ - (*func)(info); - - /* Notify the sending CPU that the task is done. */ - mb(); - if (wait) - atomic_inc(&data->finished); - } - break; - - case IPI_CPU_STOP: + case IPI_CALL_FUNC: + handle_call_data(); + break; + + case IPI_CPU_STOP: stop_this_cpu(); break; #ifdef CONFIG_KEXEC - case IPI_KDUMP_CPU_STOP: + case IPI_KDUMP_CPU_STOP: unw_init_running(kdump_cpu_freeze, NULL); break; #endif - default: - printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); break; } } while (ops); @@ -213,6 +215,19 @@ send_IPI_allbutself (int op) * Called with preemption disabled. */ static inline void +send_IPI_mask(cpumask_t mask, int op) +{ + unsigned int cpu; + + for_each_cpu_mask(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void send_IPI_all (int op) { int i; @@ -401,6 +416,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int } EXPORT_SYMBOL(smp_call_function_single); +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * <mask> The set of cpus to run on. Must not include the current cpu. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <wait> If true, wait (atomically) until function + * has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + struct call_data_struct data; + cpumask_t allbutself; + int cpus; + + spin_lock(&call_lock); + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/ + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(IPI_CALL_FUNC); + else + send_IPI_mask(mask, IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + call_data = NULL; + + spin_unlock(&call_lock); + return 0; + +} +EXPORT_SYMBOL(smp_call_function_mask); + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 32ee5979a042..d7ad42b77d41 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -400,9 +400,9 @@ smp_callin (void) /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); cpu_set(cpuid, cpu_online_map); - unlock_ipi_calllock(); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); + unlock_ipi_calllock(); smp_setup_percpu_timer(); @@ -873,7 +873,8 @@ identify_siblings(struct cpuinfo_ia64 *c) u16 pltid; pal_logical_to_physical_t info; - if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) { + status = ia64_pal_logical_to_phys(-1, &info); + if (status != PAL_STATUS_SUCCESS) { if (status != PAL_STATUS_UNIMPLEMENTED) { printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", @@ -885,8 +886,13 @@ identify_siblings(struct cpuinfo_ia64 *c) info.overview_cpp = 1; info.overview_tpc = 1; } - if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) { - printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status); + + status = ia64_sal_physical_id_info(&pltid); + if (status != PAL_STATUS_SUCCESS) { + if (status != PAL_STATUS_UNIMPLEMENTED) + printk(KERN_ERR + "ia64_sal_pltid failed with %ld\n", + status); return; } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 17fda5293c67..8c73643f2d66 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + +#include <linux/kernel_stat.h> + +extern cputime_t cycle_to_cputime(u64 cyc); + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +{ + struct thread_info *pi = task_thread_info(prev); + struct thread_info *ni = task_thread_info(next); + cputime_t delta_stime, delta_utime; + __u64 now; + + now = ia64_get_itc(); + + delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); + account_system_time(prev, 0, delta_stime); + account_system_time_scaled(prev, delta_stime); + + if (pi->ac_utime) { + delta_utime = cycle_to_cputime(pi->ac_utime); + account_user_time(prev, delta_utime); + account_user_time_scaled(prev, delta_utime); + } + + pi->ac_stamp = ni->ac_stamp = now; + ni->ac_stime = ni->ac_utime = 0; +} + +/* + * Account time for a transition between system, hard irq or soft irq state. + * Note that this function is called with interrupts enabled. + */ +void account_system_vtime(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + unsigned long flags; + cputime_t delta_stime; + __u64 now; + + local_irq_save(flags); + + now = ia64_get_itc(); + + delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); + account_system_time(tsk, 0, delta_stime); + account_system_time_scaled(tsk, delta_stime); + ti->ac_stime = 0; + + ti->ac_stamp = now; + + local_irq_restore(flags); +} + +/* + * Called from the timer interrupt handler to charge accumulated user time + * to the current process. Must be called with interrupts disabled. + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + struct thread_info *ti = task_thread_info(p); + cputime_t delta_utime; + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(p, delta_utime); + account_user_time_scaled(p, delta_utime); + ti->ac_utime = 0; + } +} + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + static irqreturn_t timer_interrupt (int irq, void *dev_id) { @@ -301,11 +379,6 @@ static struct irqaction timer_irqaction = { .name = "timer" }; -void __devinit ia64_disable_timer(void) -{ - ia64_set_itv(1 << 16); -} - void __init time_init (void) { diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index a2484fc1a06c..26228e2d01ae 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -27,9 +27,20 @@ static struct ia64_cpu *sysfs_cpus; -int arch_register_cpu(int num) +void arch_fix_phys_package_id(int num, u32 slot) { -#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) +#ifdef CONFIG_SMP + if (cpu_data(num)->socket_id == -1) + cpu_data(num)->socket_id = slot; +#endif +} +EXPORT_SYMBOL_GPL(arch_fix_phys_package_id); + + +#ifdef CONFIG_HOTPLUG_CPU +int __ref arch_register_cpu(int num) +{ +#ifdef CONFIG_ACPI /* * If CPEI can be re-targetted or if this is not * CPEI target, then it is hotpluggable @@ -38,19 +49,21 @@ int arch_register_cpu(int num) sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif - return register_cpu(&sysfs_cpus[num].cpu, num); } - -#ifdef CONFIG_HOTPLUG_CPU +EXPORT_SYMBOL(arch_register_cpu); void arch_unregister_cpu(int num) { unregister_cpu(&sysfs_cpus[num].cpu); unmap_cpu_from_node(num, cpu_to_node(num)); } -EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); +#else +static int __init arch_register_cpu(int num) +{ + return register_cpu(&sysfs_cpus[num].cpu, num); +} #endif /*CONFIG_HOTPLUG_CPU*/ diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 6903361d11a5..ff0e7c10faa7 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -13,6 +13,7 @@ * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. * 2001/01/17 Add support emulation of unaligned kernel accesses. */ +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/tty.h> @@ -1290,7 +1291,7 @@ within_logging_rate_limit (void) { static unsigned long count, last_time; - if (jiffies - last_time > 5*HZ) + if (time_after(jiffies, last_time + 5 * HZ)) count = 0; if (count < 5) { last_time = jiffies; diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 2a90c32024f4..e77995a6e3ed 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -177,12 +177,13 @@ failed: * uncached_alloc_page * * @starting_nid: node id of node to start with, or -1 + * @n_pages: number of contiguous pages to allocate * - * Allocate 1 uncached page. Allocates on the requested node. If no - * uncached pages are available on the requested node, roundrobin starting - * with the next higher node. + * Allocate the specified number of contiguous uncached pages on the + * the requested node. If not enough contiguous uncached pages are available + * on the requested node, roundrobin starting with the next higher node. */ -unsigned long uncached_alloc_page(int starting_nid) +unsigned long uncached_alloc_page(int starting_nid, int n_pages) { unsigned long uc_addr; struct uncached_pool *uc_pool; @@ -202,7 +203,8 @@ unsigned long uncached_alloc_page(int starting_nid) if (uc_pool->pool == NULL) continue; do { - uc_addr = gen_pool_alloc(uc_pool->pool, PAGE_SIZE); + uc_addr = gen_pool_alloc(uc_pool->pool, + n_pages * PAGE_SIZE); if (uc_addr != 0) return uc_addr; } while (uncached_add_chunk(uc_pool, nid) == 0); @@ -217,11 +219,12 @@ EXPORT_SYMBOL(uncached_alloc_page); /* * uncached_free_page * - * @uc_addr: uncached address of page to free + * @uc_addr: uncached address of first page to free + * @n_pages: number of contiguous pages to free * - * Free a single uncached page. + * Free the specified number of uncached pages. */ -void uncached_free_page(unsigned long uc_addr) +void uncached_free_page(unsigned long uc_addr, int n_pages) { int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); struct gen_pool *pool = uncached_pools[nid].pool; @@ -232,7 +235,7 @@ void uncached_free_page(unsigned long uc_addr) if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) panic("uncached_free_page invalid address %lx\n", uc_addr); - gen_pool_free(pool, uc_addr, PAGE_SIZE); + gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE); } EXPORT_SYMBOL(uncached_free_page); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 80622acc95de..5929ab10a289 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -156,6 +156,13 @@ SECTIONS __end___vtop_patchlist = .; } + .data.patch.rse : AT(ADDR(.data.patch.rse) - LOAD_OFFSET) + { + __start___rse_patchlist = .; + *(.data.patch.rse) + __end___rse_patchlist = .; + } + .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) { __start___mckinley_e9_bundles = .; |