summaryrefslogtreecommitdiff
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile2
-rw-r--r--arch/ia64/kernel/acpi.c25
-rw-r--r--arch/ia64/kernel/asm-offsets.c20
-rw-r--r--arch/ia64/kernel/crash.c56
-rw-r--r--arch/ia64/kernel/efi.c46
-rw-r--r--arch/ia64/kernel/entry.S92
-rw-r--r--arch/ia64/kernel/fsys.S88
-rw-r--r--arch/ia64/kernel/head.S20
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c6
-rw-r--r--arch/ia64/kernel/init_task.c1
-rw-r--r--arch/ia64/kernel/iosapic.c2
-rw-r--r--arch/ia64/kernel/irq.c4
-rw-r--r--arch/ia64/kernel/irq_ia64.c2
-rw-r--r--arch/ia64/kernel/ivt.S147
-rw-r--r--arch/ia64/kernel/kprobes.c133
-rw-r--r--arch/ia64/kernel/mca.c135
-rw-r--r--arch/ia64/kernel/mca_asm.S5
-rw-r--r--arch/ia64/kernel/minstate.h60
-rw-r--r--arch/ia64/kernel/numa.c2
-rw-r--r--arch/ia64/kernel/palinfo.c8
-rw-r--r--arch/ia64/kernel/patch.c31
-rw-r--r--arch/ia64/kernel/perfmon.c239
-rw-r--r--arch/ia64/kernel/process.c55
-rw-r--r--arch/ia64/kernel/ptrace.c1217
-rw-r--r--arch/ia64/kernel/sal.c18
-rw-r--r--arch/ia64/kernel/salinfo.c12
-rw-r--r--arch/ia64/kernel/semaphore.c165
-rw-r--r--arch/ia64/kernel/setup.c74
-rw-r--r--arch/ia64/kernel/signal.c15
-rw-r--r--arch/ia64/kernel/smp.c150
-rw-r--r--arch/ia64/kernel/smpboot.c14
-rw-r--r--arch/ia64/kernel/time.c83
-rw-r--r--arch/ia64/kernel/topology.c25
-rw-r--r--arch/ia64/kernel/unaligned.c3
-rw-r--r--arch/ia64/kernel/uncached.c23
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S7
36 files changed, 2051 insertions, 934 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 33e5a598672d..13fd10e8699e 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
- salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
unwind.o mca.o mca_asm.o topology.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 78f28d825f30..43687cc60dfb 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -117,7 +117,10 @@ acpi_get_sysname(void)
if (!strcmp(hdr->oem_id, "HP")) {
return "hpzx1";
} else if (!strcmp(hdr->oem_id, "SGI")) {
- return "sn2";
+ if (!strcmp(hdr->oem_table_id + 4, "UV"))
+ return "uv";
+ else
+ return "sn2";
}
return "dig";
@@ -130,6 +133,8 @@ acpi_get_sysname(void)
return "hpzx1_swiotlb";
# elif defined (CONFIG_IA64_SGI_SN2)
return "sn2";
+# elif defined (CONFIG_IA64_SGI_UV)
+ return "uv";
# elif defined (CONFIG_IA64_DIG)
return "dig";
# else
@@ -423,6 +428,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
@@ -459,7 +465,6 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
printk(KERN_ERR
"ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
len, slit->header.length);
- memset(numa_slit, 10, sizeof(numa_slit));
return;
}
slit_table = slit;
@@ -482,6 +487,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
(pa->apic_id << 8) | (pa->local_sapic_eid);
/* nid should be overridden as logical node id later */
node_cpuid[srat_num_cpus].nid = pxm;
+ cpu_set(srat_num_cpus, early_cpu_possible_map);
srat_num_cpus++;
}
@@ -559,7 +565,7 @@ void __init acpi_numa_arch_fixup(void)
}
/* set logical node id in cpu structure */
- for (i = 0; i < srat_num_cpus; i++)
+ for_each_possible_early_cpu(i)
node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
printk(KERN_INFO "Number of logical nodes in system = %d\n",
@@ -567,8 +573,14 @@ void __init acpi_numa_arch_fixup(void)
printk(KERN_INFO "Number of memory chunks in system = %d\n",
num_node_memblks);
- if (!slit_table)
+ if (!slit_table) {
+ for (i = 0; i < MAX_NUMNODES; i++)
+ for (j = 0; j < MAX_NUMNODES; j++)
+ node_distance(i, j) = i == j ? LOCAL_DISTANCE :
+ REMOTE_DISTANCE;
return;
+ }
+
memset(numa_slit, -1, sizeof(numa_slit));
for (i = 0; i < slit_table->locality_count; i++) {
if (!pxm_bit_test(i))
@@ -620,6 +632,9 @@ void acpi_unregister_gsi(u32 gsi)
if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
return;
+ if (has_8259 && gsi < 16)
+ return;
+
iosapic_unregister_intr(gsi);
}
@@ -964,7 +979,7 @@ acpi_map_iosapics (void)
fs_initcall(acpi_map_iosapics);
#endif /* CONFIG_ACPI_NUMA */
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
{
int err;
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 0aebc6f79e95..c64a55af9b95 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -7,8 +7,9 @@
#define ASM_OFFSETS_C 1
#include <linux/sched.h>
+#include <linux/pid.h>
#include <linux/clocksource.h>
-
+#include <linux/kbuild.h>
#include <asm-ia64/processor.h>
#include <asm-ia64/ptrace.h>
#include <asm-ia64/siginfo.h>
@@ -18,11 +19,6 @@
#include "../kernel/sigframe.h"
#include "../kernel/fsyscall_gtod_data.h"
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
void foo(void)
{
DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
@@ -34,17 +30,29 @@ void foo(void)
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+ BUILD_BUG_ON(sizeof(struct upid) != 32);
+ DEFINE(IA64_UPID_SHIFT, 5);
+
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+ DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+ DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+ DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+ DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+ DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index fbe742ad2fde..f065093f8e9b 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
static void
machine_kdump_on_init(void)
{
+ crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
- if (!kdump_on_init)
+ if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
if (!ia64_kimage) {
@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
return NOTIFY_DONE;
switch (val) {
- case DIE_INIT_MONARCH_PROCESS:
+ case DIE_INIT_MONARCH_PROCESS:
+ if (kdump_on_init) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
- break;
- case DIE_INIT_MONARCH_LEAVE:
+ }
+ break;
+ case DIE_INIT_MONARCH_LEAVE:
+ if (kdump_on_init)
machine_kdump_on_init();
- break;
- case DIE_INIT_SLAVE_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_RENDZVOUS_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_MONARCH_LEAVE:
- /* die_register->signr indicate if MCA is recoverable */
- if (!args->signr)
- machine_kdump_on_init();
- break;
+ break;
+ case DIE_INIT_SLAVE_LEAVE:
+ if (atomic_read(&kdump_in_progress))
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_RENDZVOUS_LEAVE:
+ if (atomic_read(&kdump_in_progress))
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_MONARCH_LEAVE:
+ /* *(nd->data) indicate if MCA is recoverable */
+ if (kdump_on_fatal_mca && !(*(nd->data))) {
+ atomic_set(&kdump_in_progress, 1);
+ *(nd->monarch_cpu) = -1;
+ machine_kdump_on_init();
+ }
+ break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
-static ctl_table kdump_on_init_table[] = {
+static ctl_table kdump_ctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "kdump_on_fatal_mca",
+ .data = &kdump_on_fatal_mca,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
- .child = kdump_on_init_table,
+ .child = kdump_ctl_table,
},
{ .ctl_name = 0 }
};
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 728d7247a1a6..d45f215bc8fc 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -37,6 +37,7 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mca.h>
+#include <asm/tlbflush.h>
#define EFI_DEBUG 0
@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
return NULL;
}
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+ u8 sum = 0;
+ u8 *end = buffer + length;
+
+ while (buffer < end)
+ sum = (u8) (sum + *(buffer++));
+
+ return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+ struct palo_table *palo = __va(palo_phys);
+ u8 checksum;
+
+ if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+ printk(KERN_INFO "PALO signature incorrect.\n");
+ return;
+ }
+
+ checksum = palo_checksum((u8 *)palo, palo->length);
+ if (checksum) {
+ printk(KERN_INFO "PALO checksum incorrect.\n");
+ return;
+ }
+
+ setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
void
efi_map_pal_code (void)
{
@@ -432,6 +468,7 @@ efi_init (void)
u64 efi_desc_size;
char *cp, vendor[100] = "unknown";
int i;
+ unsigned long palo_phys;
/*
* It's too early to be able to use the standard kernel command line
@@ -496,6 +533,8 @@ efi_init (void)
efi.hcdp = EFI_INVALID_TABLE_ADDR;
efi.uga = EFI_INVALID_TABLE_ADDR;
+ palo_phys = EFI_INVALID_TABLE_ADDR;
+
for (i = 0; i < (int) efi.systab->nr_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
efi.mps = config_tables[i].table;
@@ -515,10 +554,17 @@ efi_init (void)
} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
efi.hcdp = config_tables[i].table;
printk(" HCDP=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid,
+ PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+ palo_phys = config_tables[i].table;
+ printk(" PALO=0x%lx", config_tables[i].table);
}
}
printk("\n");
+ if (palo_phys != EFI_INVALID_TABLE_ADDR)
+ handle_palo(palo_phys);
+
runtime = __va(efi.systab->runtime);
efi.get_time = phys_get_time;
efi.set_time = phys_set_time;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c331c464b40..ca2bb95726de 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -570,6 +570,7 @@ GLOBAL_ENTRY(ia64_trace_syscall)
br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
.ret3:
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+(pUStk) rsm psr.i // disable interrupts
br.cond.sptk .work_pending_syscall_end
strace_error:
@@ -710,6 +711,16 @@ ENTRY(ia64_leave_syscall)
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
.work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ adds r2=PT(LOADRS)+16,r12
+(pUStk) mov.m r22=ar.itc // fetch time at leave
+ adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+(p6) ld4 r31=[r18] // load current_thread_info()->flags
+ ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
+ adds r3=PT(AR_BSPSTORE)+16,r12 // deferred
+ ;;
+#else
adds r2=PT(LOADRS)+16,r12
adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +729,7 @@ ENTRY(ia64_leave_syscall)
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0
;;
+#endif
mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +749,21 @@ ENTRY(ia64_leave_syscall)
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+ ;;
+ ld8 r30=[r2],16 // M0|1 load cr.ifs
+ ld8 r25=[r3],16 // M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+ ;;
+#else
mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
+#endif
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop 0
@@ -759,7 +780,11 @@ ENTRY(ia64_leave_syscall)
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17 // M2|3
+#else
(pUStk) st1 [r14]=r17 // M2|3
+#endif
ld8.fill r13=[r3],16 // M0|1
mov f8=f0 // F clear f8
;;
@@ -775,12 +800,22 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ mov r19=ar.bsp // M2 get new backing store pointer
+ st8 [r14]=r22 // M save time at leave
+ mov f10=f0 // F clear f10
+
+ mov r22=r0 // A clear r22
+ movl r14=__kernel_syscall_via_epc // X
+ ;;
+#else
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
nop.m 0
movl r14=__kernel_syscall_via_epc // X
;;
+#endif
mov.m ar.csd=r0 // M2 clear ar.csd
mov.m ar.ccv=r0 // M2 clear ar.ccv
mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +948,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ .pred.rel.mutex pUStk,pKStk
+(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+(pUStk) mov.m r22=ar.itc // M fetch time at leave
+ nop.i 0
+ ;;
+#else
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
nop.i 0
nop.i 0
;;
+#endif
ld8 r29=[r16],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip
;;
@@ -938,15 +981,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
+#endif
;;
ld8.fill r14=[r16],16
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
+ // mib : mov add br -> mib : ld8 add br
+ // bbb_ : br nop cover;; mbb_ : mov br cover;;
+ //
+ // no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk) st8 [r3]=r22 // save time at leave
+(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
+ shr.u r18=r19,16 // get byte size of existing "dirty" partition
+ ;;
+ ld8.fill r3=[r16] // deferred
+ LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk) br.cond.dpnt skip_rbs_switch
+ mov r16=ar.bsp // get existing backing store pointer
+#else
ld8.fill r3=[r16]
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
@@ -954,6 +1019,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov r16=ar.bsp // get existing backing store pointer
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
+#endif
/*
* Restore user backing store.
@@ -1090,6 +1156,9 @@ skip_rbs_switch:
* r31 = current->thread_info->flags
* On exit:
* p6 = TRUE if work-pending-check needs to be redone
+ *
+ * Interrupts are disabled on entry, reenabled depend on work, and
+ * disabled on exit.
*/
.work_pending_syscall:
add r2=-8,r2
@@ -1098,16 +1167,16 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
+ tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
;;
(pKStk) st4 [r20]=r21
- ssm psr.i // enable interrupts
#endif
+ ssm psr.i // enable interrupts
br.call.spnt.many rp=schedule
-.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
+.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check)
rsm psr.i // disable interrupts
;;
#ifdef CONFIG_PREEMPT
@@ -1116,13 +1185,13 @@ skip_rbs_switch:
(pKStk) st4 [r20]=r0 // preempt_count() <- 0
#endif
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
+ br.cond.sptk.many .work_processed_kernel
.notify:
(pUStk) br.call.spnt.many rp=notify_resume_user
-.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
+.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check)
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // don't re-check
+ br.cond.sptk.many .work_processed_kernel
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
@@ -1130,7 +1199,7 @@ skip_rbs_switch:
;;
ld8 r8=[r2]
ld8 r10=[r3]
- br.cond.sptk.many .work_processed_syscall // re-check
+ br.cond.sptk.many .work_processed_syscall
END(ia64_leave_kernel)
@@ -1168,9 +1237,12 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail)
END(ia64_invoke_schedule_tail)
/*
- * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
- * be set up by the caller. We declare 8 input registers so the system call
- * args get preserved, in case we need to restart a system call.
+ * Setup stack and call do_notify_resume_user(), keeping interrupts
+ * disabled.
+ *
+ * Note that pSys and pNonSys need to be set up by the caller.
+ * We declare 8 input registers so the system call args get preserved,
+ * in case we need to restart a system call.
*/
ENTRY(notify_resume_user)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 44841971f077..c1625c7e1779 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
+ add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+ ;;
+ ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
- add r8=IA64_TASK_TGID_OFFSET,r16
+ add r17=IA64_TASK_TGIDLINK_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
- ld4 r8=[r8] // r8 = current->tgid
+ ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
+ ;;
+ add r8=IA64_PID_LEVEL_OFFSET,r17
+ ;;
+ ld4 r8=[r8] // r8 = pid->level
+ add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
+ ;;
+ shl r8=r8,IA64_UPID_SHIFT
+ ;;
+ add r17=r17,r8 // r17 = &pid->numbers[pid->level]
+ ;;
+ ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
+ ;;
+ mov r17=0
;;
cmp.ne p8,p0=0,r9
(p8) br.spnt.many fsys_fallback_syscall
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
+ ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
- add r8=IA64_TASK_PID_OFFSET,r16
+ add r8=IA64_PID_LEVEL_OFFSET,r17
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
- ld4 r8=[r8]
+ ld4 r8=[r8] // r8 = pid->level
+ add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
+ ;;
+ shl r8=r8,IA64_UPID_SHIFT
+ ;;
+ add r17=r17,r8 // r17 = &pid->numbers[pid->level]
+ ;;
+ ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
+ ;;
cmp.ne p8,p0=0,r9
mov r17=-1
;;
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
- //
- // Additional note that code has changed a lot. Optimization is TBD.
- // Comments begin with "?" are maybe outdated.
- tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
- mov pr = r30,0xc000 // Set predicates according to function
+
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+ tnat.nz p6,p0 = r31 // guard against Nat argument
+(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
+ ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
- ld4 r2 = [r2] // process work pending flags
- ;;
-(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
- add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+ mov pr = r30,0xc000 // Set predicates according to function
+ ;;
and r2 = TIF_ALLWORK_MASK,r2
-(p6) br.cond.spnt.few .fail_einval // ? deferred branch
+ add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
;;
- add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+ add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
-(p6) br.cond.spnt.many fsys_fallback_syscall
+(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
- ;; // ? could be removed by moving the last add upward
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
- // ? simulate tbit.nz.or p7,p0 = r28,0
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
- ;; // ? overloaded 3 bundles!
+ ;;
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
- mov r8 = r0
(p14) getf.sig r2 = f8
;;
+ mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ mov.m r30=ar.itc // M get cycle for accounting
+#else
nop.m 0
+#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ // mov.m r30=ar.itc is called in advance
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+ ;;
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
+ ;;
+ ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
+ ld8 r21=[r17] // cumulated utime
+ sub r22=r19,r18 // stime before leave kernel
+ ;;
+ st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
+ sub r18=r30,r19 // elapsed time in user mode
+ ;;
+ add r20=r20,r22 // sum stime
+ add r21=r21,r18 // sum utime
+ ;;
+ st8 [r16]=r20 // update stime
+ st8 [r17]=r21 // update utime
+ ;;
+#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index d3a41d5f8d12..ddeab4e36fd5 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
br.ret.sptk.many rp
END(sched_clock)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+ alloc r16=ar.pfs,1,0,0,0
+ addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+ ;;
+ ldf8 f8=[r8]
+ ;;
+ setf.sig f9=r32
+ ;;
+ xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
+ xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
+ ;;
+ getf.sig r8=f10 // (5 cyc)
+ getf.sig r9=f11
+ ;;
+ shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+ br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
GLOBAL_ENTRY(start_kernel_thread)
.prologue
.save rp, r0 // this is the end of the call-chain
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 8e7193d55528..6da1f20d7372 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
#include <asm/page.h>
EXPORT_SYMBOL(clear_page);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index bc8efcad28b8..9d7e1c66faf4 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -18,7 +18,6 @@
#include <asm/pgtable.h>
static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 082c31dcfd99..39752cdef6ff 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
NR_PREALLOCATE_RTE_ENTRIES);
- if (!rte)
- return NULL;
for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
list_add(&rte->rte_list, &free_rte_list);
}
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 6dee579f205f..7fd18f54c056 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -183,10 +183,10 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
- extern void ia64_disable_timer(void);
extern volatile int time_keeper_id;
- ia64_disable_timer();
+ /* Mask ITV to disable timer */
+ ia64_set_itv(1 << 16);
/*
* Find a new timesync master
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index d8be23fbe6bc..5538471e8d68 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
static unsigned char count;
static long last_time;
- if (jiffies - last_time > 5*HZ)
+ if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (++count < 5) {
last_time = jiffies;
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8be00d..80b44ea052d7 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -805,8 +805,13 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ ;;
+ mov b6=r30 // I0 setup syscall handler branch reg early
+#else
nop.i 0
;;
+#endif
mov.m r25=ar.unat // M2 (5 cyc)
dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ mov.m r30=ar.itc // M get cycle for accounting
+#else
mov b6=r30 // I0 setup syscall handler branch reg early
+#endif
cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ // mov.m r30=ar.itc is called in advance, and r13 is current
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
+(pKStk) br.cond.spnt .skip_accounting // B unlikely skip
+ ;;
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave
+ ;;
+ ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime
+ ld8 r21=[r17] // M cumulated utime
+ sub r22=r19,r18 // A stime before leave
+ ;;
+ st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp
+ sub r18=r30,r19 // A elapsed time in user
+ ;;
+ add r20=r20,r22 // A sum stime
+ add r21=r21,r18 // A sum utime
+ ;;
+ st8 [r16]=r20 // M update stime
+ st8 [r17]=r21 // M update utime
+ ;;
+.skip_accounting:
+#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
nop 0
bsw.1 // B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
+ * - r30: ar.itc for accounting (don't touch)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
@@ -1042,53 +1076,46 @@ END(ia64_syscall_setup)
DBG_FAULT(15)
FAULT(15)
+ .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ DBG_FAULT(16)
+ FAULT(16)
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
- * Squatting in this space ...
+ * There is no particular reason for this code to be here, other than
+ * that there happens to be space here that would go unused otherwise.
+ * If this fault ever gets "unreserved", simply moved the following
+ * code to a more suitable spot...
*
- * This special case dispatcher for illegal operation faults allows preserved
- * registers to be modified through a callback function (asm only) that is handed
- * back from the fault handler in r8. Up to three arguments can be passed to the
- * callback function by returning an aggregate with the callback as its first
- * element, followed by the arguments.
+ * account_sys_enter is called from SAVE_MIN* macros if accounting is
+ * enabled and if the macro is entered from user mode.
*/
-ENTRY(dispatch_illegal_op_fault)
- .prologue
- .body
- SAVE_MIN_WITH_COVER
- ssm psr.ic | PSR_DEFAULT_BITS
+ENTRY(account_sys_enter)
+ // mov.m r20=ar.itc is called in advance, and r13 is current
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
- srlz.i // guarantee that interruption collection is on
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel
+ ;;
+ ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
+ ld8 r21=[r17] // cumulated utime
+ sub r22=r19,r18 // stime before leave kernel
;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
+ st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp
+ sub r18=r20,r19 // elapsed time in user mode
;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
- mov out0=ar.ec
+ add r23=r23,r22 // sum stime
+ add r21=r21,r18 // sum utime
;;
- SAVE_REST
- PT_REGS_UNWIND_INFO(0)
+ st8 [r16]=r23 // update stime
+ st8 [r17]=r21 // update utime
;;
- br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0: ;;
- alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
- mov out0=r9
- mov out1=r10
- mov out2=r11
- movl r15=ia64_leave_kernel
- ;;
- mov rp=r15
- mov b6=r8
- ;;
- cmp.ne p6,p0=0,r8
-(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
- br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
- .org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
- DBG_FAULT(16)
- FAULT(16)
+ br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
.org ia64_ivt+0x4400
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1646,6 +1673,48 @@ END(ia32_interrupt)
DBG_FAULT(67)
FAULT(67)
+ /*
+ * Squatting in this space ...
+ *
+ * This special case dispatcher for illegal operation faults allows preserved
+ * registers to be modified through a callback function (asm only) that is handed
+ * back from the fault handler in r8. Up to three arguments can be passed to the
+ * callback function by returning an aggregate with the callback as its first
+ * element, followed by the arguments.
+ */
+ENTRY(dispatch_illegal_op_fault)
+ .prologue
+ .body
+ SAVE_MIN_WITH_COVER
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
+ mov out0=ar.ec
+ ;;
+ SAVE_REST
+ PT_REGS_UNWIND_INFO(0)
+ ;;
+ br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+ alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r9
+ mov out1=r10
+ mov out2=r11
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ mov b6=r8
+ ;;
+ cmp.ne p6,p0=0,r8
+(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
+ br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
#ifdef CONFIG_IA32_SUPPORT
/*
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 8d9a446a0d17..233434f4f88f 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
{ u, u, u }, /* 1F */
};
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+ s64 rel = ((s64) to - (s64) from) >> 4;
+ bundle_t *brl;
+ brl = (bundle_t *) ((u64) from & ~0xf);
+ brl->quad0.template = 0x05; /* [MLX](stop) */
+ brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
+ brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+ brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+ /* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+ brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
}
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+ unsigned int major_opcode;
+ unsigned int template = bundle->quad0.template;
+ unsigned long kprobe_inst;
+
+ /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot++;
+
+ /* Get Kprobe probe instruction at given slot*/
+ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+ /* For break instruction,
+ * Bits 37:40 Major opcode to be zero
+ * Bits 27:32 X6 to be zero
+ * Bits 32:35 X3 to be zero
+ */
+ if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+ /* Not a break instruction */
+ return 0;
+ }
+
+ /* Is a break instruction */
+ return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+ unsigned long bundle_addr)
+{
+ unsigned int template = bundle->quad0.template;
+
+ do {
+ if (search_exception_tables(bundle_addr + slot) ||
+ __is_ia64_break_inst(bundle, slot))
+ return 0; /* exception may occur in this bundle*/
+ } while ((++slot) < 3);
+ template &= 0x1e;
+ if (template >= 0x10 /* including B unit */ ||
+ template == 0x04 /* including X unit */ ||
+ template == 0x06) /* undefined */
+ return 0;
+
+ return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+ unsigned int slot = (unsigned long)p->addr & 0xf;
+ struct kprobe *other_kp;
+
+ if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+ set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+ p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+ }
+
+ /* disables boosters in previous slots */
+ for (; addr < (unsigned long)p->addr; addr++) {
+ other_kp = get_kprobe((void *)addr);
+ if (other_kp)
+ other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+ }
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
unsigned long addr = (unsigned long) p->addr;
@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
+ prepare_booster(p);
+
return 0;
}
@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
src = &p->opcode.bundle;
flush_icache_range((unsigned long)p->ainsn.insn,
- (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ (unsigned long)p->ainsn.insn +
+ sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
switch (p->ainsn.slot) {
case 0:
dest->quad0.slot0 = src->quad0.slot0;
@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
+ free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
mutex_unlock(&kprobe_mutex);
}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
- if (p->ainsn.inst_flag) {
+ if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
- unsigned int template, major_opcode;
- unsigned long kprobe_inst;
unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
bundle_t bundle;
memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
- template = bundle.quad0.template;
-
- /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
- if (slot == 1 && bundle_encoding[template][1] == L)
- slot++;
- /* Get Kprobe probe instruction at given slot*/
- get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
- /* For break instruction,
- * Bits 37:40 Major opcode to be zero
- * Bits 27:32 X6 to be zero
- * Bits 32:35 X3 to be zero
- */
- if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
- /* Not a break instruction */
- return 0;
- }
-
- /* Is a break instruction */
- return 1;
+ return __is_ia64_break_inst(&bundle, slot);
}
static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
return 1;
ss_probe:
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+ if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+ /* Boost up -- we can execute copied instructions directly */
+ ia64_psr(regs)->ri = p->ainsn.slot;
+ regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+ /* turn single stepping off */
+ ia64_psr(regs)->ss = 0;
+
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+#endif
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6c18221dba36..705176b434b3 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -69,6 +69,7 @@
* 2007-04-27 Russ Anderson <rja@sgi.com>
* Support multiple cpus going through OS_MCA in the same event.
*/
+#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -97,6 +98,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include <asm/tlb.h>
#include "mca_drv.h"
#include "entry.h"
@@ -107,11 +109,26 @@
# define IA64_MCA_DEBUG(fmt...)
#endif
+#define NOTIFY_INIT(event, regs, arg, spin) \
+do { \
+ if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \
+ == NOTIFY_STOP) && ((spin) == 1)) \
+ ia64_mca_spin(__func__); \
+} while (0)
+
+#define NOTIFY_MCA(event, regs, arg, spin) \
+do { \
+ if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \
+ == NOTIFY_STOP) && ((spin) == 1)) \
+ ia64_mca_spin(__func__); \
+} while (0)
+
/* Used by mca_asm.S */
DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */
unsigned long __per_cpu_mca[NR_CPUS];
@@ -293,7 +310,8 @@ static void ia64_mlogbuf_dump_from_init(void)
if (mlogbuf_finished)
return;
- if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+ if (mlogbuf_timestamp &&
+ time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up.\n");
ia64_mlogbuf_finish(0);
@@ -762,9 +780,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
/* Mask all interrupts */
local_irq_save(flags);
- if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
/* Register with the SAL monarch that the slave has
@@ -772,17 +789,13 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
*/
ia64_sal_mc_rendez();
- if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
/* Wait for the monarch cpu to exit. */
while (monarch_cpu != -1)
cpu_relax(); /* spin until monarch leaves */
- if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
/* Enable all interrupts */
@@ -1182,6 +1195,49 @@ all_in:
return;
}
+/* mca_insert_tr
+ *
+ * Switch rid when TR reload and needed!
+ * iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+ int i;
+ u64 old_rr;
+ struct ia64_tr_entry *p;
+ unsigned long psr;
+ int cpu = smp_processor_id();
+
+ psr = ia64_clear_ic();
+ for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+ p = &__per_cpu_idtrs[cpu][iord-1][i];
+ if (p->pte & 0x1) {
+ old_rr = ia64_get_rr(p->ifa);
+ if (old_rr != p->rr) {
+ ia64_set_rr(p->ifa, p->rr);
+ ia64_srlz_d();
+ }
+ ia64_ptr(iord, p->ifa, p->itir >> 2);
+ ia64_srlz_i();
+ if (iord & 0x1) {
+ ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+ ia64_srlz_i();
+ }
+ if (iord & 0x2) {
+ ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+ ia64_srlz_i();
+ }
+ if (old_rr != p->rr) {
+ ia64_set_rr(p->ifa, old_rr);
+ ia64_srlz_d();
+ }
+ }
+ }
+ ia64_set_psr(psr);
+}
+
/*
* ia64_mca_handler
*
@@ -1209,7 +1265,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
int recover, cpu = smp_processor_id();
struct task_struct *previous_current;
struct ia64_mca_notify_die nd =
- { .sos = sos, .monarch_cpu = &monarch_cpu };
+ { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
static atomic_t mca_count;
static cpumask_t mca_cpu;
@@ -1225,9 +1281,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
- if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
if (sos->monarch) {
@@ -1241,13 +1295,12 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
* does not work.
*/
ia64_mca_wakeup_all();
- if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
} else {
while (cpu_isset(cpu, mca_cpu))
cpu_relax(); /* spin until monarch wakes us */
- }
+ }
+
+ NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
/* Get the MCA error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -1266,15 +1319,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
-#ifdef CONFIG_KEXEC
- atomic_set(&kdump_in_progress, 1);
- monarch_cpu = -1;
-#endif
}
- if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ if (__get_cpu_var(ia64_mca_tr_reload)) {
+ mca_insert_tr(0x1); /*Reload dynamic itrs*/
+ mca_insert_tr(0x2); /*Reload dynamic itrs*/
+ }
+
+ NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
if (atomic_dec_return(&mca_count) > 0) {
int i;
@@ -1595,7 +1647,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
struct ia64_mca_notify_die nd =
{ .sos = sos, .monarch_cpu = &monarch_cpu };
- (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
+ NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
sos->proc_state_param, cpu, sos->monarch);
@@ -1632,17 +1684,15 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
while (monarch_cpu == -1)
cpu_relax(); /* spin until monarch enters */
- if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
- if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+
+ NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
+ NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
+
while (monarch_cpu != -1)
cpu_relax(); /* spin until monarch leaves */
- if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+
+ NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
+
mprintk("Slave on cpu %d returning to normal service.\n", cpu);
set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -1651,9 +1701,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
}
monarch_cpu = cpu;
- if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
/*
* Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be
@@ -1668,12 +1716,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
* to default_monarch_init_process() above and just print all the
* tasks.
*/
- if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
- if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__func__);
+ NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
+ NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
+
mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
atomic_dec(&monarchs);
set_curr_task(cpu, previous_current);
@@ -1905,7 +1950,7 @@ ia64_mca_init(void)
printk(KERN_INFO "Increasing MCA rendezvous timeout from "
"%ld to %ld milliseconds\n", timeout, isrv.v0);
timeout = isrv.v0;
- (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0);
+ NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
continue;
}
printk(KERN_ERR "Failed to register rendezvous interrupt "
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 8bc7d259e0c6..a06d46548ff9 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -219,8 +219,13 @@ ia64_reload_tr:
mov r20=IA64_TR_CURRENT_STACK
;;
itr.d dtr[r20]=r16
+ GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+ mov r18 = 1
;;
srlz.d
+ ;;
+ st8 [r2] =r18
+ ;;
done_tlb_purge_and_reload:
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8bada786..74b6d670aaef 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -3,6 +3,21 @@
#include "entry.h"
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP \
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER \
+(pUStk) br.call.spnt rp=account_sys_enter \
+ ;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
+.section ".data.patch.rse", "a"
+.previous
+
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
@@ -28,7 +43,7 @@
* Note that psr.ic is NOT turned on by this macro. This is so that
* we can pass interruption state as arguments to a handler.
*/
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND) \
mov r16=IA64_KR(CURRENT); /* M */ \
mov r27=ar.rsc; /* M */ \
mov r20=r1; /* A */ \
@@ -75,6 +90,7 @@
tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
mov r29=b0 \
;; \
+ WORKAROUND; \
adds r16=PT(R8),r1; /* initialize first base pointer */ \
adds r17=PT(R9),r1; /* initialize second base pointer */ \
(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
@@ -122,11 +138,13 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
+ ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
+ ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
@@ -192,6 +210,40 @@
st8 [r25]=r10; /* ar.ssd */ \
;;
-#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
+#define RSE_WORKAROUND \
+(pUStk) extr.u r17=r18,3,6; \
+(pUStk) sub r16=r18,r22; \
+[1:](pKStk) br.cond.sptk.many 1f; \
+ .xdata4 ".data.patch.rse",1b-. \
+ ;; \
+ cmp.ge p6,p7 = 33,r17; \
+ ;; \
+(p6) mov r17=0x310; \
+(p7) mov r17=0x308; \
+ ;; \
+ cmp.leu p1,p0=r16,r17; \
+(p1) br.cond.sptk.many 1f; \
+ dep.z r17=r26,0,62; \
+ movl r16=2f; \
+ ;; \
+ mov ar.pfs=r17; \
+ dep r27=r0,r27,16,14; \
+ mov b0=r16; \
+ ;; \
+ br.ret.sptk b0; \
+ ;; \
+2: \
+ mov ar.rsc=r0 \
+ ;; \
+ flushrs; \
+ ;; \
+ mov ar.bspstore=r22 \
+ ;; \
+ mov r18=ar.bsp; \
+ ;; \
+1: \
+ .pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , )
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f5fe2f..c93420c97409 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
- for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+ for_each_possible_early_cpu(cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 396004e8cd14..9dc00f7fe10e 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -900,12 +900,6 @@ static void
palinfo_smp_call(void *info)
{
palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
- if (data == NULL) {
- printk(KERN_ERR "palinfo: data pointer is NULL\n");
- data->ret = 0; /* no output */
- return;
- }
- /* does this actual call */
data->ret = (*data->func)(data->page);
}
@@ -1053,7 +1047,7 @@ static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block palinfo_cpu_notifier __cpuinitdata =
+static struct notifier_block __refdata palinfo_cpu_notifier =
{
.notifier_call = palinfo_cpu_callback,
.priority = 0,
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 2cb9425e0421..b83b2c516008 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -115,6 +115,29 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
ia64_srlz_i();
}
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+ s32 *offp = (s32 *) start;
+ u64 ip, *b;
+
+ while (offp < (s32 *) end) {
+ ip = (u64) offp + *offp;
+
+ b = (u64 *)(ip & -16);
+ b[1] &= ~0xf800000L;
+ ia64_fc((void *) ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
@@ -135,10 +158,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
- wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
- wp[1] = 0x0004000000000200UL;
- wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
- wp[3] = 0x0084006880000200UL;
+ wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+ wp[1] = 0x0084006880000200UL;
+ wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+ wp[3] = 0x0004000000000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index a2aabfdc80d9..7714a97b0104 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -867,7 +867,7 @@ pfm_rvfree(void *mem, unsigned long size)
}
static pfm_context_t *
-pfm_context_alloc(void)
+pfm_context_alloc(int ctx_flags)
{
pfm_context_t *ctx;
@@ -878,6 +878,46 @@ pfm_context_alloc(void)
ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
if (ctx) {
DPRINT(("alloc ctx @%p\n", ctx));
+
+ /*
+ * init context protection lock
+ */
+ spin_lock_init(&ctx->ctx_lock);
+
+ /*
+ * context is unloaded
+ */
+ ctx->ctx_state = PFM_CTX_UNLOADED;
+
+ /*
+ * initialization of context's flags
+ */
+ ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+ ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+ ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
+ /*
+ * will move to set properties
+ * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+ */
+
+ /*
+ * init restart semaphore to locked
+ */
+ init_completion(&ctx->ctx_restart_done);
+
+ /*
+ * activation is used in SMP only
+ */
+ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+ SET_LAST_CPU(ctx, -1);
+
+ /*
+ * initialize notification message queue
+ */
+ ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+ init_waitqueue_head(&ctx->ctx_msgq_wait);
+ init_waitqueue_head(&ctx->ctx_zombieq);
+
}
return ctx;
}
@@ -1824,11 +1864,6 @@ pfm_flush(struct file *filp, fl_owner_t id)
* invoked after, it will find an empty queue and no
* signal will be sent. In both case, we are safe
*/
- if (filp->f_flags & FASYNC) {
- DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
- pfm_do_fasync (-1, filp, ctx, 0);
- }
-
PROTECT_CTX(ctx, flags);
state = ctx->ctx_state;
@@ -1959,6 +1994,11 @@ pfm_close(struct inode *inode, struct file *filp)
return -EBADF;
}
+ if (filp->f_flags & FASYNC) {
+ DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
+ pfm_do_fasync(-1, filp, ctx, 0);
+ }
+
PROTECT_CTX(ctx, flags);
state = ctx->ctx_state;
@@ -2165,28 +2205,21 @@ static struct dentry_operations pfmfs_dentry_operations = {
};
-static int
-pfm_alloc_fd(struct file **cfile)
+static struct file *
+pfm_alloc_file(pfm_context_t *ctx)
{
- int fd, ret = 0;
- struct file *file = NULL;
- struct inode * inode;
+ struct file *file;
+ struct inode *inode;
+ struct dentry *dentry;
char name[32];
struct qstr this;
- fd = get_unused_fd();
- if (fd < 0) return -ENFILE;
-
- ret = -ENFILE;
-
- file = get_empty_filp();
- if (!file) goto out;
-
/*
* allocate a new inode
*/
inode = new_inode(pfmfs_mnt->mnt_sb);
- if (!inode) goto out;
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
@@ -2199,59 +2232,28 @@ pfm_alloc_fd(struct file **cfile)
this.len = strlen(name);
this.hash = inode->i_ino;
- ret = -ENOMEM;
-
/*
* allocate a new dcache entry
*/
- file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
- if (!file->f_path.dentry) goto out;
+ dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+ if (!dentry) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
- file->f_path.dentry->d_op = &pfmfs_dentry_operations;
+ dentry->d_op = &pfmfs_dentry_operations;
+ d_add(dentry, inode);
- d_add(file->f_path.dentry, inode);
- file->f_path.mnt = mntget(pfmfs_mnt);
- file->f_mapping = inode->i_mapping;
+ file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops);
+ if (!file) {
+ dput(dentry);
+ return ERR_PTR(-ENFILE);
+ }
- file->f_op = &pfm_file_ops;
- file->f_mode = FMODE_READ;
file->f_flags = O_RDONLY;
- file->f_pos = 0;
-
- /*
- * may have to delay until context is attached?
- */
- fd_install(fd, file);
-
- /*
- * the file structure we will use
- */
- *cfile = file;
-
- return fd;
-out:
- if (file) put_filp(file);
- put_unused_fd(fd);
- return ret;
-}
+ file->private_data = ctx;
-static void
-pfm_free_fd(int fd, struct file *file)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
-
- /*
- * there ie no fd_uninstall(), so we do it here
- */
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- rcu_assign_pointer(fdt->fd[fd], NULL);
- spin_unlock(&files->file_lock);
-
- if (file)
- put_filp(file);
- put_unused_fd(fd);
+ return file;
}
static int
@@ -2475,6 +2477,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
/* link buffer format and context */
ctx->ctx_buf_fmt = fmt;
+ ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */
/*
* check if buffer format wants to use perfmon buffer allocation/mapping service
@@ -2669,78 +2672,45 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
{
pfarg_context_t *req = (pfarg_context_t *)arg;
struct file *filp;
+ struct path path;
int ctx_flags;
+ int fd;
int ret;
/* let's check the arguments first */
ret = pfarg_is_sane(current, req);
- if (ret < 0) return ret;
+ if (ret < 0)
+ return ret;
ctx_flags = req->ctx_flags;
ret = -ENOMEM;
- ctx = pfm_context_alloc();
- if (!ctx) goto error;
+ fd = get_unused_fd();
+ if (fd < 0)
+ return fd;
- ret = pfm_alloc_fd(&filp);
- if (ret < 0) goto error_file;
+ ctx = pfm_context_alloc(ctx_flags);
+ if (!ctx)
+ goto error;
- req->ctx_fd = ctx->ctx_fd = ret;
+ filp = pfm_alloc_file(ctx);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto error_file;
+ }
- /*
- * attach context to file
- */
- filp->private_data = ctx;
+ req->ctx_fd = ctx->ctx_fd = fd;
/*
* does the user want to sample?
*/
if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req);
- if (ret) goto buffer_error;
+ if (ret)
+ goto buffer_error;
}
- /*
- * init context protection lock
- */
- spin_lock_init(&ctx->ctx_lock);
-
- /*
- * context is unloaded
- */
- ctx->ctx_state = PFM_CTX_UNLOADED;
-
- /*
- * initialization of context's flags
- */
- ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
- ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
- ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
- ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
- /*
- * will move to set properties
- * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
- */
-
- /*
- * init restart semaphore to locked
- */
- init_completion(&ctx->ctx_restart_done);
-
- /*
- * activation is used in SMP only
- */
- ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
- SET_LAST_CPU(ctx, -1);
-
- /*
- * initialize notification message queue
- */
- ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
- init_waitqueue_head(&ctx->ctx_msgq_wait);
- init_waitqueue_head(&ctx->ctx_zombieq);
-
DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
ctx,
ctx_flags,
@@ -2755,10 +2725,14 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
*/
pfm_reset_pmu_state(ctx);
+ fd_install(fd, filp);
+
return 0;
buffer_error:
- pfm_free_fd(ctx->ctx_fd, filp);
+ path = filp->f_path;
+ put_filp(filp);
+ path_put(&path);
if (ctx->ctx_buf_fmt) {
pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
@@ -2767,6 +2741,7 @@ error_file:
pfm_context_free(ctx);
error:
+ put_unused_fd(fd);
return ret;
}
@@ -4204,10 +4179,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
do_each_thread (g, t) {
if (t->thread.pfm_context == ctx) {
ret = 0;
- break;
+ goto out;
}
} while_each_thread (g, t);
-
+out:
read_unlock(&tasklist_lock);
DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
@@ -5038,12 +5013,13 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
}
static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
+
/*
* pfm_handle_work() can be called with interrupts enabled
* (TIF_NEED_RESCHED) or disabled. The down_interruptible
* call may sleep, therefore we must re-enable interrupts
* to avoid deadlocks. It is safe to do so because this function
- * is called ONLY when returning to user level (PUStk=1), in which case
+ * is called ONLY when returning to user level (pUStk=1), in which case
* there is no risk of kernel stack overflow due to deep
* interrupt nesting.
*/
@@ -5059,7 +5035,8 @@ pfm_handle_work(void)
ctx = PFM_GET_CTX(current);
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current));
+ printk(KERN_ERR "perfmon: [%d] has no PFM context\n",
+ task_pid_nr(current));
return;
}
@@ -5083,11 +5060,12 @@ pfm_handle_work(void)
/*
* must be done before we check for simple-reset mode
*/
- if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
-
+ if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE)
+ goto do_zombie;
//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
- if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
+ if (reason == PFM_TRAP_REASON_RESET)
+ goto skip_blocking;
/*
* restore interrupt mask to what it was on entry.
@@ -5135,7 +5113,8 @@ do_zombie:
/*
* in case of interruption of down() we don't restart anything
*/
- if (ret < 0) goto nothing_to_do;
+ if (ret < 0)
+ goto nothing_to_do;
skip_blocking:
pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
@@ -5511,7 +5490,7 @@ stop_monitoring:
}
static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_do_interrupt_handler(void *arg, struct pt_regs *regs)
{
struct task_struct *task;
pfm_context_t *ctx;
@@ -5591,7 +5570,7 @@ pfm_interrupt_handler(int irq, void *arg)
start_cycles = ia64_get_itc();
- ret = pfm_do_interrupt_handler(irq, arg, regs);
+ ret = pfm_do_interrupt_handler(arg, regs);
total_cycles = ia64_get_itc();
@@ -6695,16 +6674,12 @@ pfm_init(void)
/*
* create /proc/perfmon (mostly for debugging purposes)
*/
- perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+ perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
if (perfmon_dir == NULL) {
printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
pmu_conf = NULL;
return -1;
}
- /*
- * install customized file operations for /proc/perfmon entry
- */
- perfmon_dir->proc_fops = &pfm_proc_fops;
/*
* create /proc/sys/kernel/perfmon (for debugging purposes)
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 49937a383b23..a3a34b4eb038 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -167,11 +167,18 @@ void tsk_clear_notify_resume(struct task_struct *tsk)
clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
}
+/*
+ * do_notify_resume_user():
+ * Called from notify_resume_user at entry.S, with interrupts disabled.
+ */
void
-do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall)
+do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
{
if (fsys_mode(current, &scr->pt)) {
- /* defer signal-handling etc. until we return to privilege-level 0. */
+ /*
+ * defer signal-handling etc. until we return to
+ * privilege-level 0.
+ */
if (!ia64_psr(&scr->pt)->lp)
ia64_psr(&scr->pt)->lp = 1;
return;
@@ -179,16 +186,26 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall
#ifdef CONFIG_PERFMON
if (current->thread.pfm_needs_checking)
+ /*
+ * Note: pfm_handle_work() allow us to call it with interrupts
+ * disabled, and may enable interrupts within the function.
+ */
pfm_handle_work();
#endif
/* deal with pending signal delivery */
- if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
+ if (test_thread_flag(TIF_SIGPENDING)) {
+ local_irq_enable(); /* force interrupt enable */
ia64_do_signal(scr, in_syscall);
+ }
/* copy user rbs to kernel rbs */
- if (unlikely(test_thread_flag(TIF_RESTORE_RSE)))
+ if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
+ local_irq_enable(); /* force interrupt enable */
ia64_sync_krbs();
+ }
+
+ local_irq_disable(); /* force interrupt disable */
}
static int pal_halt = 1;
@@ -625,21 +642,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu(current, info, arg);
}
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
- struct unw_frame_info tcore_info;
-
- if (current == task) {
- unw_init_running(do_copy_regs, regs);
- } else {
- memset(&tcore_info, 0, sizeof(tcore_info));
- unw_init_from_blocked_task(&tcore_info, task);
- do_copy_task_regs(task, &tcore_info, regs);
- }
- return 1;
-}
-
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
@@ -647,21 +649,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
}
int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
- struct unw_frame_info tcore_info;
-
- if (current == task) {
- unw_init_running(do_dump_fpu, dst);
- } else {
- memset(&tcore_info, 0, sizeof(tcore_info));
- unw_init_from_blocked_task(&tcore_info, task);
- do_dump_task_fpu(task, &tcore_info, dst);
- }
- return 1;
-}
-
-int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
unw_init_running(do_dump_fpu, dst);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ab784ec4319d..2a9943b5947f 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,6 +3,9 @@
*
* Copyright (C) 1999-2005 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ * 2006-08-12 - IA64 Native Utrace implementation support added by
+ * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*
* Derived from the x86 and Alpha versions.
*/
@@ -17,6 +20,8 @@
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
psr->dfh = 1;
}
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
- unsigned long *data, int write_access)
-{
- struct ia64_fpreg fpval;
- int ret;
-
- ret = unw_get_fr(info, regnum, &fpval);
- if (ret < 0)
- return ret;
-
- if (write_access) {
- fpval.u.bits[hi] = *data;
- ret = unw_set_fr(info, regnum, fpval);
- } else
- *data = fpval.u.bits[hi];
- return ret;
-}
-
/*
* Change the machine-state of CHILD such that it will return via the normal
* kernel exit-path, rather than the syscall-exit path.
@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
static int
access_uarea (struct task_struct *child, unsigned long addr,
- unsigned long *data, int write_access)
-{
- unsigned long *ptr, regnum, urbs_end, cfm;
- struct switch_stack *sw;
- struct pt_regs *pt;
-# define pt_reg_addr(pt, reg) ((void *) \
- ((unsigned long) (pt) \
- + offsetof(struct pt_regs, reg)))
-
-
- pt = task_pt_regs(child);
- sw = (struct switch_stack *) (child->thread.ksp + 16);
-
- if ((addr & 0x7) != 0) {
- dprintk("ptrace: unaligned register address 0x%lx\n", addr);
- return -1;
- }
-
- if (addr < PT_F127 + 16) {
- /* accessing fph */
- if (write_access)
- ia64_sync_fph(child);
- else
- ia64_flush_fph(child);
- ptr = (unsigned long *)
- ((unsigned long) &child->thread.fph + addr);
- } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
- /* scratch registers untouched by kernel (saved in pt_regs) */
- ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
- } else if (addr >= PT_F12 && addr < PT_F15 + 16) {
- /*
- * Scratch registers untouched by kernel (saved in
- * switch_stack).
- */
- ptr = (unsigned long *) ((long) sw
- + (addr - PT_NAT_BITS - 32));
- } else if (addr < PT_AR_LC + 8) {
- /* preserved state: */
- struct unw_frame_info info;
- char nat = 0;
- int ret;
-
- unw_init_from_blocked_task(&info, child);
- if (unw_unwind_to_user(&info) < 0)
- return -1;
-
- switch (addr) {
- case PT_NAT_BITS:
- return access_nat_bits(child, pt, &info,
- data, write_access);
-
- case PT_R4: case PT_R5: case PT_R6: case PT_R7:
- if (write_access) {
- /* read NaT bit first: */
- unsigned long dummy;
-
- ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
- &dummy, &nat);
- if (ret < 0)
- return ret;
- }
- return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
- &nat, write_access);
-
- case PT_B1: case PT_B2: case PT_B3:
- case PT_B4: case PT_B5:
- return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
- write_access);
-
- case PT_AR_EC:
- return unw_access_ar(&info, UNW_AR_EC, data,
- write_access);
-
- case PT_AR_LC:
- return unw_access_ar(&info, UNW_AR_LC, data,
- write_access);
-
- default:
- if (addr >= PT_F2 && addr < PT_F5 + 16)
- return access_fr(&info, (addr - PT_F2)/16 + 2,
- (addr & 8) != 0, data,
- write_access);
- else if (addr >= PT_F16 && addr < PT_F31 + 16)
- return access_fr(&info,
- (addr - PT_F16)/16 + 16,
- (addr & 8) != 0,
- data, write_access);
- else {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
- }
- } else if (addr < PT_F9+16) {
- /* scratch state */
- switch (addr) {
- case PT_AR_BSP:
- /*
- * By convention, we use PT_AR_BSP to refer to
- * the end of the user-level backing store.
- * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
- * to get the real value of ar.bsp at the time
- * the kernel was entered.
- *
- * Furthermore, when changing the contents of
- * PT_AR_BSP (or PT_CFM) while the task is
- * blocked in a system call, convert the state
- * so that the non-system-call exit
- * path is used. This ensures that the proper
- * state will be picked up when resuming
- * execution. However, it *also* means that
- * once we write PT_AR_BSP/PT_CFM, it won't be
- * possible to modify the syscall arguments of
- * the pending system call any longer. This
- * shouldn't be an issue because modifying
- * PT_AR_BSP/PT_CFM generally implies that
- * we're either abandoning the pending system
- * call or that we defer it's re-execution
- * (e.g., due to GDB doing an inferior
- * function call).
- */
- urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
- if (write_access) {
- if (*data != urbs_end) {
- if (in_syscall(pt))
- convert_to_non_syscall(child,
- pt,
- cfm);
- /*
- * Simulate user-level write
- * of ar.bsp:
- */
- pt->loadrs = 0;
- pt->ar_bspstore = *data;
- }
- } else
- *data = urbs_end;
- return 0;
-
- case PT_CFM:
- urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
- if (write_access) {
- if (((cfm ^ *data) & PFM_MASK) != 0) {
- if (in_syscall(pt))
- convert_to_non_syscall(child,
- pt,
- cfm);
- pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
- | (*data & PFM_MASK));
- }
- } else
- *data = cfm;
- return 0;
-
- case PT_CR_IPSR:
- if (write_access) {
- unsigned long tmp = *data;
- /* psr.ri==3 is a reserved value: SDM 2:25 */
- if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
- tmp &= ~IA64_PSR_RI;
- pt->cr_ipsr = ((tmp & IPSR_MASK)
- | (pt->cr_ipsr & ~IPSR_MASK));
- } else
- *data = (pt->cr_ipsr & IPSR_MASK);
- return 0;
-
- case PT_AR_RSC:
- if (write_access)
- pt->ar_rsc = *data | (3 << 2); /* force PL3 */
- else
- *data = pt->ar_rsc;
- return 0;
-
- case PT_AR_RNAT:
- ptr = pt_reg_addr(pt, ar_rnat);
- break;
- case PT_R1:
- ptr = pt_reg_addr(pt, r1);
- break;
- case PT_R2: case PT_R3:
- ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
- break;
- case PT_R8: case PT_R9: case PT_R10: case PT_R11:
- ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
- break;
- case PT_R12: case PT_R13:
- ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
- break;
- case PT_R14:
- ptr = pt_reg_addr(pt, r14);
- break;
- case PT_R15:
- ptr = pt_reg_addr(pt, r15);
- break;
- case PT_R16: case PT_R17: case PT_R18: case PT_R19:
- case PT_R20: case PT_R21: case PT_R22: case PT_R23:
- case PT_R24: case PT_R25: case PT_R26: case PT_R27:
- case PT_R28: case PT_R29: case PT_R30: case PT_R31:
- ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
- break;
- case PT_B0:
- ptr = pt_reg_addr(pt, b0);
- break;
- case PT_B6:
- ptr = pt_reg_addr(pt, b6);
- break;
- case PT_B7:
- ptr = pt_reg_addr(pt, b7);
- break;
- case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
- case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
- ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
- break;
- case PT_AR_BSPSTORE:
- ptr = pt_reg_addr(pt, ar_bspstore);
- break;
- case PT_AR_UNAT:
- ptr = pt_reg_addr(pt, ar_unat);
- break;
- case PT_AR_PFS:
- ptr = pt_reg_addr(pt, ar_pfs);
- break;
- case PT_AR_CCV:
- ptr = pt_reg_addr(pt, ar_ccv);
- break;
- case PT_AR_FPSR:
- ptr = pt_reg_addr(pt, ar_fpsr);
- break;
- case PT_CR_IIP:
- ptr = pt_reg_addr(pt, cr_iip);
- break;
- case PT_PR:
- ptr = pt_reg_addr(pt, pr);
- break;
- /* scratch register */
-
- default:
- /* disallow accessing anything else... */
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
- } else if (addr <= PT_AR_SSD) {
- ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
- } else {
- /* access debug registers */
-
- if (addr >= PT_IBR) {
- regnum = (addr - PT_IBR) >> 3;
- ptr = &child->thread.ibr[0];
- } else {
- regnum = (addr - PT_DBR) >> 3;
- ptr = &child->thread.dbr[0];
- }
-
- if (regnum >= 8) {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
-#ifdef CONFIG_PERFMON
- /*
- * Check if debug registers are used by perfmon. This
- * test must be done once we know that we can do the
- * operation, i.e. the arguments are all valid, but
- * before we start modifying the state.
- *
- * Perfmon needs to keep a count of how many processes
- * are trying to modify the debug registers for system
- * wide monitoring sessions.
- *
- * We also include read access here, because they may
- * cause the PMU-installed debug register state
- * (dbr[], ibr[]) to be reset. The two arrays are also
- * used by perfmon, but we do not use
- * IA64_THREAD_DBG_VALID. The registers are restored
- * by the PMU context switch code.
- */
- if (pfm_use_debug_registers(child)) return -1;
-#endif
-
- if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
- child->thread.flags |= IA64_THREAD_DBG_VALID;
- memset(child->thread.dbr, 0,
- sizeof(child->thread.dbr));
- memset(child->thread.ibr, 0,
- sizeof(child->thread.ibr));
- }
-
- ptr += regnum;
-
- if ((regnum & 1) && write_access) {
- /* don't let the user set kernel-level breakpoints: */
- *ptr = *data & ~(7UL << 56);
- return 0;
- }
- }
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
- return 0;
-}
+ unsigned long *data, int write_access);
static long
ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
if (test_thread_flag(TIF_RESTORE_RSE))
ia64_sync_krbs();
}
+
+/* Utrace implementation starts here */
+struct regset_get {
+ void *kbuf;
+ void __user *ubuf;
+};
+
+struct regset_set {
+ const void *kbuf;
+ const void __user *ubuf;
+};
+
+struct regset_getset {
+ struct task_struct *target;
+ const struct user_regset *regset;
+ union {
+ struct regset_get get;
+ struct regset_set set;
+ } u;
+ unsigned int pos;
+ unsigned int count;
+ int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long *ptr = NULL;
+ int ret;
+ char nat = 0;
+
+ pt = task_pt_regs(target);
+ switch (addr) {
+ case ELF_GR_OFFSET(1):
+ ptr = &pt->r1;
+ break;
+ case ELF_GR_OFFSET(2):
+ case ELF_GR_OFFSET(3):
+ ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+ break;
+ case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+ if (write_access) {
+ /* read NaT bit first: */
+ unsigned long dummy;
+
+ ret = unw_get_gr(info, addr/8, &dummy, &nat);
+ if (ret < 0)
+ return ret;
+ }
+ return unw_access_gr(info, addr/8, data, &nat, write_access);
+ case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+ ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+ break;
+ case ELF_GR_OFFSET(12):
+ case ELF_GR_OFFSET(13):
+ ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+ break;
+ case ELF_GR_OFFSET(14):
+ ptr = &pt->r14;
+ break;
+ case ELF_GR_OFFSET(15):
+ ptr = &pt->r15;
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long *ptr = NULL;
+
+ pt = task_pt_regs(target);
+ switch (addr) {
+ case ELF_BR_OFFSET(0):
+ ptr = &pt->b0;
+ break;
+ case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+ return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+ data, write_access);
+ case ELF_BR_OFFSET(6):
+ ptr = &pt->b6;
+ break;
+ case ELF_BR_OFFSET(7):
+ ptr = &pt->b7;
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long cfm, urbs_end;
+ unsigned long *ptr = NULL;
+
+ pt = task_pt_regs(target);
+ if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+ switch (addr) {
+ case ELF_AR_RSC_OFFSET:
+ /* force PL3 */
+ if (write_access)
+ pt->ar_rsc = *data | (3 << 2);
+ else
+ *data = pt->ar_rsc;
+ return 0;
+ case ELF_AR_BSP_OFFSET:
+ /*
+ * By convention, we use PT_AR_BSP to refer to
+ * the end of the user-level backing store.
+ * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+ * to get the real value of ar.bsp at the time
+ * the kernel was entered.
+ *
+ * Furthermore, when changing the contents of
+ * PT_AR_BSP (or PT_CFM) while the task is
+ * blocked in a system call, convert the state
+ * so that the non-system-call exit
+ * path is used. This ensures that the proper
+ * state will be picked up when resuming
+ * execution. However, it *also* means that
+ * once we write PT_AR_BSP/PT_CFM, it won't be
+ * possible to modify the syscall arguments of
+ * the pending system call any longer. This
+ * shouldn't be an issue because modifying
+ * PT_AR_BSP/PT_CFM generally implies that
+ * we're either abandoning the pending system
+ * call or that we defer it's re-execution
+ * (e.g., due to GDB doing an inferior
+ * function call).
+ */
+ urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+ if (write_access) {
+ if (*data != urbs_end) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(target,
+ pt,
+ cfm);
+ /*
+ * Simulate user-level write
+ * of ar.bsp:
+ */
+ pt->loadrs = 0;
+ pt->ar_bspstore = *data;
+ }
+ } else
+ *data = urbs_end;
+ return 0;
+ case ELF_AR_BSPSTORE_OFFSET:
+ ptr = &pt->ar_bspstore;
+ break;
+ case ELF_AR_RNAT_OFFSET:
+ ptr = &pt->ar_rnat;
+ break;
+ case ELF_AR_CCV_OFFSET:
+ ptr = &pt->ar_ccv;
+ break;
+ case ELF_AR_UNAT_OFFSET:
+ ptr = &pt->ar_unat;
+ break;
+ case ELF_AR_FPSR_OFFSET:
+ ptr = &pt->ar_fpsr;
+ break;
+ case ELF_AR_PFS_OFFSET:
+ ptr = &pt->ar_pfs;
+ break;
+ case ELF_AR_LC_OFFSET:
+ return unw_access_ar(info, UNW_AR_LC, data,
+ write_access);
+ case ELF_AR_EC_OFFSET:
+ return unw_access_ar(info, UNW_AR_EC, data,
+ write_access);
+ case ELF_AR_CSD_OFFSET:
+ ptr = &pt->ar_csd;
+ break;
+ case ELF_AR_SSD_OFFSET:
+ ptr = &pt->ar_ssd;
+ }
+ } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+ switch (addr) {
+ case ELF_CR_IIP_OFFSET:
+ ptr = &pt->cr_iip;
+ break;
+ case ELF_CFM_OFFSET:
+ urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+ if (write_access) {
+ if (((cfm ^ *data) & PFM_MASK) != 0) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(target,
+ pt,
+ cfm);
+ pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+ | (*data & PFM_MASK));
+ }
+ } else
+ *data = cfm;
+ return 0;
+ case ELF_CR_IPSR_OFFSET:
+ if (write_access) {
+ unsigned long tmp = *data;
+ /* psr.ri==3 is a reserved value: SDM 2:25 */
+ if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+ tmp &= ~IA64_PSR_RI;
+ pt->cr_ipsr = ((tmp & IPSR_MASK)
+ | (pt->cr_ipsr & ~IPSR_MASK));
+ } else
+ *data = (pt->cr_ipsr & IPSR_MASK);
+ return 0;
+ }
+ } else if (addr == ELF_NAT_OFFSET)
+ return access_nat_bits(target, pt, info,
+ data, write_access);
+ else if (addr == ELF_PR_OFFSET)
+ ptr = &pt->pr;
+ else
+ return -1;
+
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+
+ return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+ return access_elf_gpreg(target, info, addr, data, write_access);
+ else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+ return access_elf_breg(target, info, addr, data, write_access);
+ else
+ return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+ struct pt_regs *pt;
+ struct regset_getset *dst = arg;
+ elf_greg_t tmp[16];
+ unsigned int i, index, min_copy;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /*
+ * coredump format:
+ * r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+ */
+
+
+ /* Skip r0 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+ dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+ &dst->u.get.kbuf,
+ &dst->u.get.ubuf,
+ 0, ELF_GR_OFFSET(1));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* gr1 - gr15 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+ index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+ min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* r16-r31 */
+ if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+ pt = task_pt_regs(dst->target);
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+ ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* nat, pr, b0 - b7 */
+ if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+ index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+ min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+ */
+ if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+ index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+ min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+ }
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+ struct pt_regs *pt;
+ struct regset_getset *dst = arg;
+ elf_greg_t tmp[16];
+ unsigned int i, index;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip r0 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+ dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ 0, ELF_GR_OFFSET(1));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* gr1-gr15 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+ i = dst->pos;
+ index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+ if (dst->ret)
+ return;
+ for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ if (dst->count == 0)
+ return;
+ }
+
+ /* gr16-gr31 */
+ if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+ pt = task_pt_regs(dst->target);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+ ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* nat, pr, b0 - b7 */
+ if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+ i = dst->pos;
+ index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+ if (dst->ret)
+ return;
+ for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ if (dst->count == 0)
+ return;
+ }
+
+ /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+ */
+ if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+ i = dst->pos;
+ index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+ if (dst->ret)
+ return;
+ for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ }
+}
+
+#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+ struct regset_getset *dst = arg;
+ struct task_struct *task = dst->target;
+ elf_fpreg_t tmp[30];
+ int index, min_copy, i;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip pos 0 and 1 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+ dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+ &dst->u.get.kbuf,
+ &dst->u.get.ubuf,
+ 0, ELF_FP_OFFSET(2));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fr2-fr31 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+ index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+ min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+ dst->pos + dst->count);
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+ index++)
+ if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+ &tmp[index])) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fph */
+ if (dst->count > 0) {
+ ia64_flush_fph(dst->target);
+ if (task->thread.flags & IA64_THREAD_FPH_VALID)
+ dst->ret = user_regset_copyout(
+ &dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf,
+ &dst->target->thread.fph,
+ ELF_FP_OFFSET(32), -1);
+ else
+ /* Zero fill instead. */
+ dst->ret = user_regset_copyout_zero(
+ &dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf,
+ ELF_FP_OFFSET(32), -1);
+ }
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+ struct regset_getset *dst = arg;
+ elf_fpreg_t fpreg, tmp[30];
+ int index, start, end;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip pos 0 and 1 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+ dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ 0, ELF_FP_OFFSET(2));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fr2-fr31 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+ start = dst->pos;
+ end = min(((unsigned int)ELF_FP_OFFSET(32)),
+ dst->pos + dst->count);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+ if (dst->ret)
+ return;
+
+ if (start & 0xF) { /* only write high part */
+ if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+ &fpreg)) {
+ dst->ret = -EIO;
+ return;
+ }
+ tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+ = fpreg.u.bits[0];
+ start &= ~0xFUL;
+ }
+ if (end & 0xF) { /* only write low part */
+ if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+ &fpreg)) {
+ dst->ret = -EIO;
+ return;
+ }
+ tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+ = fpreg.u.bits[1];
+ end = (end + 0xF) & ~0xFUL;
+ }
+
+ for ( ; start < end ; start += sizeof(elf_fpreg_t)) {
+ index = start / sizeof(elf_fpreg_t);
+ if (unw_set_fr(info, index, tmp[index - 2])) {
+ dst->ret = -EIO;
+ return;
+ }
+ }
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* fph */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+ ia64_sync_fph(dst->target);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ &dst->target->thread.fph,
+ ELF_FP_OFFSET(32), -1);
+ }
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+ struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct regset_getset info = { .target = target, .regset = regset,
+ .pos = pos, .count = count,
+ .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+ .ret = 0 };
+
+ if (target == current)
+ unw_init_running(call, &info);
+ else {
+ struct unw_frame_info ufi;
+ memset(&ufi, 0, sizeof(ufi));
+ unw_init_from_blocked_task(&ufi, target);
+ (*call)(&ufi, &info);
+ }
+
+ return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return do_regset_call(do_gpregs_get, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return do_regset_call(do_gpregs_set, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+ do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+ const struct user_regset *regset,
+ int now)
+{
+ if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+ return 0;
+ tsk_set_notify_resume(target);
+ return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+ NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return do_regset_call(do_fpregs_get, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return do_regset_call(do_fpregs_set, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+ unsigned long *data, int write_access)
+{
+ unsigned int pos = -1; /* an invalid value */
+ int ret;
+ unsigned long *ptr, regnum;
+
+ if ((addr & 0x7) != 0) {
+ dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+ return -1;
+ }
+ if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+ (addr >= PT_R7 + 8 && addr < PT_B1) ||
+ (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+ (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+
+ switch (addr) {
+ case PT_F32 ... (PT_F127 + 15):
+ pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+ break;
+ case PT_F2 ... (PT_F5 + 15):
+ pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+ break;
+ case PT_F10 ... (PT_F31 + 15):
+ pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+ break;
+ case PT_F6 ... (PT_F9 + 15):
+ pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+ break;
+ }
+
+ if (pos != -1) {
+ if (write_access)
+ ret = fpregs_set(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ else
+ ret = fpregs_get(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ if (ret != 0)
+ return -1;
+ return 0;
+ }
+
+ switch (addr) {
+ case PT_NAT_BITS:
+ pos = ELF_NAT_OFFSET;
+ break;
+ case PT_R4 ... PT_R7:
+ pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+ break;
+ case PT_B1 ... PT_B5:
+ pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+ break;
+ case PT_AR_EC:
+ pos = ELF_AR_EC_OFFSET;
+ break;
+ case PT_AR_LC:
+ pos = ELF_AR_LC_OFFSET;
+ break;
+ case PT_CR_IPSR:
+ pos = ELF_CR_IPSR_OFFSET;
+ break;
+ case PT_CR_IIP:
+ pos = ELF_CR_IIP_OFFSET;
+ break;
+ case PT_CFM:
+ pos = ELF_CFM_OFFSET;
+ break;
+ case PT_AR_UNAT:
+ pos = ELF_AR_UNAT_OFFSET;
+ break;
+ case PT_AR_PFS:
+ pos = ELF_AR_PFS_OFFSET;
+ break;
+ case PT_AR_RSC:
+ pos = ELF_AR_RSC_OFFSET;
+ break;
+ case PT_AR_RNAT:
+ pos = ELF_AR_RNAT_OFFSET;
+ break;
+ case PT_AR_BSPSTORE:
+ pos = ELF_AR_BSPSTORE_OFFSET;
+ break;
+ case PT_PR:
+ pos = ELF_PR_OFFSET;
+ break;
+ case PT_B6:
+ pos = ELF_BR_OFFSET(6);
+ break;
+ case PT_AR_BSP:
+ pos = ELF_AR_BSP_OFFSET;
+ break;
+ case PT_R1 ... PT_R3:
+ pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+ break;
+ case PT_R12 ... PT_R15:
+ pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+ break;
+ case PT_R8 ... PT_R11:
+ pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+ break;
+ case PT_R16 ... PT_R31:
+ pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+ break;
+ case PT_AR_CCV:
+ pos = ELF_AR_CCV_OFFSET;
+ break;
+ case PT_AR_FPSR:
+ pos = ELF_AR_FPSR_OFFSET;
+ break;
+ case PT_B0:
+ pos = ELF_BR_OFFSET(0);
+ break;
+ case PT_B7:
+ pos = ELF_BR_OFFSET(7);
+ break;
+ case PT_AR_CSD:
+ pos = ELF_AR_CSD_OFFSET;
+ break;
+ case PT_AR_SSD:
+ pos = ELF_AR_SSD_OFFSET;
+ break;
+ }
+
+ if (pos != -1) {
+ if (write_access)
+ ret = gpregs_set(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ else
+ ret = gpregs_get(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ if (ret != 0)
+ return -1;
+ return 0;
+ }
+
+ /* access debug registers */
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ ptr = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ ptr = &child->thread.dbr[0];
+ }
+
+ if (regnum >= 8) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+#ifdef CONFIG_PERFMON
+ /*
+ * Check if debug registers are used by perfmon. This
+ * test must be done once we know that we can do the
+ * operation, i.e. the arguments are all valid, but
+ * before we start modifying the state.
+ *
+ * Perfmon needs to keep a count of how many processes
+ * are trying to modify the debug registers for system
+ * wide monitoring sessions.
+ *
+ * We also include read access here, because they may
+ * cause the PMU-installed debug register state
+ * (dbr[], ibr[]) to be reset. The two arrays are also
+ * used by perfmon, but we do not use
+ * IA64_THREAD_DBG_VALID. The registers are restored
+ * by the PMU context switch code.
+ */
+ if (pfm_use_debug_registers(child))
+ return -1;
+#endif
+
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(child->thread.dbr, 0,
+ sizeof(child->thread.dbr));
+ memset(child->thread.ibr, 0,
+ sizeof(child->thread.ibr));
+ }
+
+ ptr += regnum;
+
+ if ((regnum & 1) && write_access) {
+ /* don't let the user set kernel-level breakpoints: */
+ *ptr = *data & ~(7UL << 56);
+ return 0;
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+ .get = gpregs_get, .set = gpregs_set,
+ .writeback = gpregs_writeback
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = ELF_NFPREG,
+ .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+ .get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+ },
+};
+
+static const struct user_regset_view user_ia64_view = {
+ .name = "ia64",
+ .e_machine = EM_IA_64,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_SUPPORT
+ extern const struct user_regset_view user_ia32_view;
+ if (IS_IA32_PROCESS(task_pt_regs(tsk)))
+ return &user_ia32_view;
+#endif
+ return &user_ia64_view;
+}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index a3022dc48ef8..0464173ea568 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -229,6 +229,14 @@ static void __init sal_desc_ap_wakeup(void *p) { }
*/
static int sal_cache_flush_drops_interrupts;
+static int __init
+force_pal_cache_flush(char *str)
+{
+ sal_cache_flush_drops_interrupts = 1;
+ return 0;
+}
+early_param("force_pal_cache_flush", force_pal_cache_flush);
+
void __init
check_sal_cache_flush (void)
{
@@ -237,15 +245,17 @@ check_sal_cache_flush (void)
u64 vector, cache_type = 3;
struct ia64_sal_retval isrv;
+ if (sal_cache_flush_drops_interrupts)
+ return;
+
cpu = get_cpu();
local_irq_save(flags);
/*
- * Schedule a timer interrupt, wait until it's reported, and see if
- * SAL_CACHE_FLUSH drops it.
+ * Send ourselves a timer interrupt, wait until it's reported, and see
+ * if SAL_CACHE_FLUSH drops it.
*/
- ia64_set_itv(IA64_TIMER_VECTOR);
- ia64_set_itm(ia64_get_itc() + 1000);
+ platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
while (!ia64_get_irr(IA64_TIMER_VECTOR))
cpu_relax();
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 779c3cca206c..ecb9eb78d687 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -44,8 +44,8 @@
#include <linux/smp.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
+#include <linux/semaphore.h>
-#include <asm/semaphore.h>
#include <asm/sal.h>
#include <asm/uaccess.h>
@@ -648,18 +648,16 @@ salinfo_init(void)
if (!dir)
continue;
- entry = create_proc_entry("event", S_IRUSR, dir);
+ entry = proc_create_data("event", S_IRUSR, dir,
+ &salinfo_event_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_event_fops;
*sdir++ = entry;
- entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+ entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+ &salinfo_data_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_data_fops;
*sdir++ = entry;
/* we missed any events before now */
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3fbae2..000000000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- * - Only on a boundary condition do we need to care. When we go
- * from a negative count to a non-negative, we wake people up.
- * - When we go from a non-negative count to a negative do we
- * (a) synchronize with the "sleepers" count and (b) make sure
- * that we're on the wakeup list before we synchronize so that
- * we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
- wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_UNINTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * the wait_queue_head.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_UNINTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
- int retval = 0;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_INTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers ++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * With signals pending, this turns into
- * the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
- * it has contention. Just correct the count
- * and exit.
- */
- if (signal_pending(current)) {
- retval = -EINTR;
- sem->sleepers = 0;
- atomic_add(sleepers, &sem->count);
- break;
- }
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * wait_queue_head. The "-1" is because we're
- * still hoping to get the semaphore.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_INTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- tsk->state = TASK_RUNNING;
- return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
- unsigned long flags;
- int sleepers;
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- sleepers = sem->sleepers + 1;
- sem->sleepers = 0;
-
- /*
- * Add "everybody else" and us into it. They aren't
- * playing, because we own the spinlock in the
- * wait_queue_head.
- */
- if (!atomic_add_negative(sleepers, &sem->count)) {
- wake_up_locked(&sem->wait);
- }
-
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- return 1;
-}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4aa9eaea76c3..4ae15c8c2488 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -59,6 +59,7 @@
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/system.h>
+#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/hpsim.h>
@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(unsigned long start, unsigned long end, void *arg)
+{
+ void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+ if (start == PAGE_OFFSET) {
+ printk(KERN_WARNING "warning: skipping physical page 0\n");
+ start += PAGE_SIZE;
+ if (start >= end)
+ return 0;
+ }
+#endif
+ func = arg;
+ if (start < end)
+ call_pernode_memory(__pa(start), end - start, func);
+ return 0;
+}
+
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
@@ -215,6 +239,25 @@ __initcall(register_memory);
#ifdef CONFIG_KEXEC
+
+/*
+ * This function checks if the reserved crashkernel is allowed on the specific
+ * IA64 machine flavour. Machines without an IO TLB use swiotlb and require
+ * some memory below 4 GB (i.e. in 32 bit area), see the implementation of
+ * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
+ * in kdump case. See the comment in sba_init() in sba_iommu.c.
+ *
+ * So, the only machvec that really supports loading the kdump kernel
+ * over 4 GB is "sn2".
+ */
+static int __init check_crashkernel_memory(unsigned long pbase, size_t size)
+{
+ if (ia64_platform_is("sn2") || ia64_platform_is("uv"))
+ return 1;
+ else
+ return pbase < (1UL << 32);
+}
+
static void __init setup_crashkernel(unsigned long total, int *n)
{
unsigned long long base = 0, size = 0;
@@ -228,6 +271,16 @@ static void __init setup_crashkernel(unsigned long total, int *n)
base = kdump_find_rsvd_region(size,
rsvd_region, *n);
}
+
+ if (!check_crashkernel_memory(base, size)) {
+ pr_warning("crashkernel: There would be kdump memory "
+ "at %ld GB but this is unusable because it "
+ "must\nbe below 4 GB. Change the memory "
+ "configuration of the machine.\n",
+ (unsigned long)(base >> 30));
+ return;
+ }
+
if (base != ~0UL) {
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -493,6 +546,8 @@ setup_arch (char **cmdline_p)
acpi_table_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
+ per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+ 32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
# endif
#else
# ifdef CONFIG_SMP
@@ -505,6 +560,17 @@ setup_arch (char **cmdline_p)
/* process SAL system table: */
ia64_sal_init(__va(efi.sal_systab));
+#ifdef CONFIG_ITANIUM
+ ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+#else
+ {
+ u64 num_phys_stacked;
+
+ if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
+ ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+ }
+#endif
+
#ifdef CONFIG_SMP
cpu_physical_id(0) = hard_smp_processor_id();
#endif
@@ -512,8 +578,6 @@ setup_arch (char **cmdline_p)
cpu_init(); /* initialize the bootstrap CPU */
mmu_context_init(); /* initialize context_id bitmap */
- check_sal_cache_flush();
-
#ifdef CONFIG_ACPI
acpi_boot_init();
#endif
@@ -541,6 +605,7 @@ setup_arch (char **cmdline_p)
ia64_mca_init();
platform_setup(cmdline_p);
+ check_sal_cache_flush();
paging_init();
}
@@ -946,9 +1011,10 @@ cpu_init (void)
#endif
/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
- if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+ if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
- else {
+ setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+ } else {
printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
max_ctx = (1U << 15) - 1; /* use architected minimum */
}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 5740296c35af..19c5a78636fc 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -464,7 +464,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
if (!user_mode(&scr->pt))
return;
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ if (current_thread_info()->status & TS_RESTORE_SIGMASK)
oldset = &current->saved_sigmask;
else
oldset = &current->blocked;
@@ -530,12 +530,13 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
* continue to iterate in this loop so we can deliver the SIGSEGV...
*/
if (handle_signal(signr, &ka, &info, oldset, scr)) {
- /* a signal was successfully delivered; the saved
+ /*
+ * A signal was successfully delivered; the saved
* sigmask will have been stored in the signal frame,
* and will be restored by sigreturn, so we can simply
- * clear the TIF_RESTORE_SIGMASK flag */
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- clear_thread_flag(TIF_RESTORE_SIGMASK);
+ * clear the TS_RESTORE_SIGMASK flag.
+ */
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
return;
}
}
@@ -566,8 +567,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
/* if there's no signal to deliver, we just put the saved sigmask
* back */
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
+ if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
}
}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 4e446aa5f4ac..983296f1c813 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -98,8 +98,33 @@ unlock_ipi_calllock(void)
spin_unlock_irq(&call_lock);
}
+static inline void
+handle_call_data(void)
+{
+ struct call_data_struct *data;
+ void (*func)(void *info);
+ void *info;
+ int wait;
+
+ /* release the 'pointer lock' */
+ data = (struct call_data_struct *)call_data;
+ func = data->func;
+ info = data->info;
+ wait = data->wait;
+
+ mb();
+ atomic_inc(&data->started);
+ /* At this point the structure may be gone unless wait is true. */
+ (*func)(info);
+
+ /* Notify the sending CPU that the task is done. */
+ mb();
+ if (wait)
+ atomic_inc(&data->finished);
+}
+
static void
-stop_this_cpu (void)
+stop_this_cpu(void)
{
/*
* Remove this CPU:
@@ -138,44 +163,21 @@ handle_IPI (int irq, void *dev_id)
ops &= ~(1 << which);
switch (which) {
- case IPI_CALL_FUNC:
- {
- struct call_data_struct *data;
- void (*func)(void *info);
- void *info;
- int wait;
-
- /* release the 'pointer lock' */
- data = (struct call_data_struct *) call_data;
- func = data->func;
- info = data->info;
- wait = data->wait;
-
- mb();
- atomic_inc(&data->started);
- /*
- * At this point the structure may be gone unless
- * wait is true.
- */
- (*func)(info);
-
- /* Notify the sending CPU that the task is done. */
- mb();
- if (wait)
- atomic_inc(&data->finished);
- }
- break;
-
- case IPI_CPU_STOP:
+ case IPI_CALL_FUNC:
+ handle_call_data();
+ break;
+
+ case IPI_CPU_STOP:
stop_this_cpu();
break;
#ifdef CONFIG_KEXEC
- case IPI_KDUMP_CPU_STOP:
+ case IPI_KDUMP_CPU_STOP:
unw_init_running(kdump_cpu_freeze, NULL);
break;
#endif
- default:
- printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+ default:
+ printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+ this_cpu, which);
break;
}
} while (ops);
@@ -213,6 +215,19 @@ send_IPI_allbutself (int op)
* Called with preemption disabled.
*/
static inline void
+send_IPI_mask(cpumask_t mask, int op)
+{
+ unsigned int cpu;
+
+ for_each_cpu_mask(cpu, mask) {
+ send_IPI_single(cpu, op);
+ }
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
send_IPI_all (int op)
{
int i;
@@ -401,6 +416,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
}
EXPORT_SYMBOL(smp_call_function_single);
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * <mask> The set of cpus to run on. Must not include the current cpu.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <wait> If true, wait (atomically) until function
+ * has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+ void (*func)(void *), void *info,
+ int wait)
+{
+ struct call_data_struct data;
+ cpumask_t allbutself;
+ int cpus;
+
+ spin_lock(&call_lock);
+ allbutself = cpu_online_map;
+ cpu_clear(smp_processor_id(), allbutself);
+
+ cpus_and(mask, mask, allbutself);
+ cpus = cpus_weight(mask);
+ if (!cpus) {
+ spin_unlock(&call_lock);
+ return 0;
+ }
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
+
+ /* Send a message to other CPUs */
+ if (cpus_equal(mask, allbutself))
+ send_IPI_allbutself(IPI_CALL_FUNC);
+ else
+ send_IPI_mask(mask, IPI_CALL_FUNC);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+ call_data = NULL;
+
+ spin_unlock(&call_lock);
+ return 0;
+
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
/*
* this function sends a 'generic call function' IPI to all other CPUs
* in the system.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 32ee5979a042..d7ad42b77d41 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -400,9 +400,9 @@ smp_callin (void)
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
cpu_set(cpuid, cpu_online_map);
- unlock_ipi_calllock();
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
+ unlock_ipi_calllock();
smp_setup_percpu_timer();
@@ -873,7 +873,8 @@ identify_siblings(struct cpuinfo_ia64 *c)
u16 pltid;
pal_logical_to_physical_t info;
- if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
+ status = ia64_pal_logical_to_phys(-1, &info);
+ if (status != PAL_STATUS_SUCCESS) {
if (status != PAL_STATUS_UNIMPLEMENTED) {
printk(KERN_ERR
"ia64_pal_logical_to_phys failed with %ld\n",
@@ -885,8 +886,13 @@ identify_siblings(struct cpuinfo_ia64 *c)
info.overview_cpp = 1;
info.overview_tpc = 1;
}
- if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+
+ status = ia64_sal_physical_id_info(&pltid);
+ if (status != PAL_STATUS_SUCCESS) {
+ if (status != PAL_STATUS_UNIMPLEMENTED)
+ printk(KERN_ERR
+ "ia64_sal_pltid failed with %ld\n",
+ status);
return;
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 17fda5293c67..8c73643f2d66 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+ struct thread_info *pi = task_thread_info(prev);
+ struct thread_info *ni = task_thread_info(next);
+ cputime_t delta_stime, delta_utime;
+ __u64 now;
+
+ now = ia64_get_itc();
+
+ delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+ account_system_time(prev, 0, delta_stime);
+ account_system_time_scaled(prev, delta_stime);
+
+ if (pi->ac_utime) {
+ delta_utime = cycle_to_cputime(pi->ac_utime);
+ account_user_time(prev, delta_utime);
+ account_user_time_scaled(prev, delta_utime);
+ }
+
+ pi->ac_stamp = ni->ac_stamp = now;
+ ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+ unsigned long flags;
+ cputime_t delta_stime;
+ __u64 now;
+
+ local_irq_save(flags);
+
+ now = ia64_get_itc();
+
+ delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+ account_system_time(tsk, 0, delta_stime);
+ account_system_time_scaled(tsk, delta_stime);
+ ti->ac_stime = 0;
+
+ ti->ac_stamp = now;
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process. Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+ struct thread_info *ti = task_thread_info(p);
+ cputime_t delta_utime;
+
+ if (ti->ac_utime) {
+ delta_utime = cycle_to_cputime(ti->ac_utime);
+ account_user_time(p, delta_utime);
+ account_user_time_scaled(p, delta_utime);
+ ti->ac_utime = 0;
+ }
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
@@ -301,11 +379,6 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
-void __devinit ia64_disable_timer(void)
-{
- ia64_set_itv(1 << 16);
-}
-
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index a2484fc1a06c..26228e2d01ae 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -27,9 +27,20 @@
static struct ia64_cpu *sysfs_cpus;
-int arch_register_cpu(int num)
+void arch_fix_phys_package_id(int num, u32 slot)
{
-#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
+#ifdef CONFIG_SMP
+ if (cpu_data(num)->socket_id == -1)
+ cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __ref arch_register_cpu(int num)
+{
+#ifdef CONFIG_ACPI
/*
* If CPEI can be re-targetted or if this is not
* CPEI target, then it is hotpluggable
@@ -38,19 +49,21 @@ int arch_register_cpu(int num)
sysfs_cpus[num].cpu.hotpluggable = 1;
map_cpu_to_node(num, node_cpuid[num].nid);
#endif
-
return register_cpu(&sysfs_cpus[num].cpu, num);
}
-
-#ifdef CONFIG_HOTPLUG_CPU
+EXPORT_SYMBOL(arch_register_cpu);
void arch_unregister_cpu(int num)
{
unregister_cpu(&sysfs_cpus[num].cpu);
unmap_cpu_from_node(num, cpu_to_node(num));
}
-EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+static int __init arch_register_cpu(int num)
+{
+ return register_cpu(&sysfs_cpus[num].cpu, num);
+}
#endif /*CONFIG_HOTPLUG_CPU*/
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 6903361d11a5..ff0e7c10faa7 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -13,6 +13,7 @@
* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
* 2001/01/17 Add support emulation of unaligned kernel accesses.
*/
+#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/tty.h>
@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
{
static unsigned long count, last_time;
- if (jiffies - last_time > 5*HZ)
+ if (time_after(jiffies, last_time + 5 * HZ))
count = 0;
if (count < 5) {
last_time = jiffies;
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 2a90c32024f4..e77995a6e3ed 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License
@@ -177,12 +177,13 @@ failed:
* uncached_alloc_page
*
* @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
*
- * Allocate 1 uncached page. Allocates on the requested node. If no
- * uncached pages are available on the requested node, roundrobin starting
- * with the next higher node.
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
*/
-unsigned long uncached_alloc_page(int starting_nid)
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
{
unsigned long uc_addr;
struct uncached_pool *uc_pool;
@@ -202,7 +203,8 @@ unsigned long uncached_alloc_page(int starting_nid)
if (uc_pool->pool == NULL)
continue;
do {
- uc_addr = gen_pool_alloc(uc_pool->pool, PAGE_SIZE);
+ uc_addr = gen_pool_alloc(uc_pool->pool,
+ n_pages * PAGE_SIZE);
if (uc_addr != 0)
return uc_addr;
} while (uncached_add_chunk(uc_pool, nid) == 0);
@@ -217,11 +219,12 @@ EXPORT_SYMBOL(uncached_alloc_page);
/*
* uncached_free_page
*
- * @uc_addr: uncached address of page to free
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
*
- * Free a single uncached page.
+ * Free the specified number of uncached pages.
*/
-void uncached_free_page(unsigned long uc_addr)
+void uncached_free_page(unsigned long uc_addr, int n_pages)
{
int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
struct gen_pool *pool = uncached_pools[nid].pool;
@@ -232,7 +235,7 @@ void uncached_free_page(unsigned long uc_addr)
if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
panic("uncached_free_page invalid address %lx\n", uc_addr);
- gen_pool_free(pool, uc_addr, PAGE_SIZE);
+ gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
}
EXPORT_SYMBOL(uncached_free_page);
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 80622acc95de..5929ab10a289 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -156,6 +156,13 @@ SECTIONS
__end___vtop_patchlist = .;
}
+ .data.patch.rse : AT(ADDR(.data.patch.rse) - LOAD_OFFSET)
+ {
+ __start___rse_patchlist = .;
+ *(.data.patch.rse)
+ __end___rse_patchlist = .;
+ }
+
.data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
{
__start___mckinley_e9_bundles = .;