diff options
author | Dave Airlie <airlied@starflyer.(none)> | 2006-01-03 18:18:01 +1100 |
---|---|---|
committer | Dave Airlie <airlied@linux.ie> | 2006-01-03 18:18:01 +1100 |
commit | 97f2aab6698f3ab2552c41c1024a65ffd0763a6d (patch) | |
tree | bb6e3b2949459f54f884c710fc74d40eef00d834 /arch/powerpc | |
parent | d985c1088146607532093d9eaaaf99758f6a4d21 (diff) | |
parent | 88026842b0a760145aa71d69e74fbc9ec118ca44 (diff) |
drm: merge in Linus mainline
Diffstat (limited to 'arch/powerpc')
157 files changed, 18745 insertions, 2049 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1493c7896fe3..db93dbc0e21a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -227,7 +227,7 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-32)" + int "Maximum number of CPUs (2-128)" range 2 128 depends on SMP default "32" if PPC64 @@ -261,7 +261,7 @@ config PPC_ISERIES config EMBEDDED6xx bool "Embedded 6xx/7xx/7xxx-based board" - depends on PPC32 + depends on PPC32 && BROKEN config APUS bool "Amiga-APUS" @@ -305,7 +305,7 @@ config PPC_PMAC64 config PPC_PREP bool " PowerPC Reference Platform (PReP) based machines" - depends on PPC_MULTIPLATFORM && PPC32 + depends on PPC_MULTIPLATFORM && PPC32 && BROKEN select PPC_I8259 select PPC_INDIRECT_PCI default y @@ -581,17 +581,12 @@ config ARCH_FLATMEM_ENABLE def_bool y depends on PPC64 && !NUMA -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y - depends on SMP && PPC_PSERIES -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config ARCH_SPARSEMEM_ENABLE - def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE + depends on SMP && PPC_PSERIES source "mm/Kconfig" @@ -599,18 +594,9 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NEED_MULTIPLE_NODES -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. -# -# This is a relatively temporary hack that should -# be able to go away when sparsemem is fully in -# place - -config NODES_SPAN_OTHER_NODES +config ARCH_MEMORY_PROBE def_bool y - depends on NEED_MULTIPLE_NODES + depends on MEMORY_HOTPLUG config PPC_64K_PAGES bool "64k page size" @@ -933,6 +919,7 @@ source "arch/powerpc/oprofile/Kconfig" config KPROBES bool "Kprobes (EXPERIMENTAL)" + depends on PPC64 help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5bc11bd36c1f..a13eb575f834 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -14,10 +14,6 @@ HAS_BIARCH := $(call cc-option-yn, -m32) -ifeq ($(CONFIG_PPC64),y) -OLDARCH := ppc64 -SZ := 64 - # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= @@ -37,6 +33,12 @@ endif export CROSS32CC CROSS32AS CROSS32LD CROSS32OBJCOPY +KBUILD_DEFCONFIG := $(shell uname -m)_defconfig + +ifeq ($(CONFIG_PPC64),y) +OLDARCH := ppc64 +SZ := 64 + new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) ifeq ($(new_nm),y) @@ -59,15 +61,17 @@ endif LDFLAGS_vmlinux := -Bstatic # The -Iarch/$(ARCH)/include is temporary while we are merging -CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH)/include -AFLAGS += -Iarch/$(ARCH) -CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe +CPPFLAGS-$(CONFIG_PPC32) := -Iarch/$(ARCH) -Iarch/$(ARCH)/include +AFLAGS-$(CONFIG_PPC32) := -Iarch/$(ARCH) CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=none -mcall-aixdesc -CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple -CFLAGS += $(CFLAGS-y) +CFLAGS-$(CONFIG_PPC32) := -Iarch/$(ARCH) -ffixed-r2 -mmultiple +CPPFLAGS += $(CPPFLAGS-y) +AFLAGS += $(AFLAGS-y) +CFLAGS += -msoft-float -pipe $(CFLAGS-y) CPP = $(CC) -E $(CFLAGS) # Temporary hack until we have migrated to asm-powerpc -LINUXINCLUDE += -Iarch/$(ARCH)/include +LINUXINCLUDE-$(CONFIG_PPC32) := -Iarch/$(ARCH)/include +LINUXINCLUDE += $(LINUXINCLUDE-y) CHECKFLAGS += -m$(SZ) -D__powerpc__ -D__powerpc$(SZ)__ @@ -111,9 +115,6 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200 AFLAGS += $(cpu-as-y) CFLAGS += $(cpu-as-y) -# Default to the common case. -KBUILD_DEFCONFIG := common_defconfig - head-y := arch/powerpc/kernel/head_32.o head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o @@ -125,11 +126,11 @@ head-$(CONFIG_PPC64) += arch/powerpc/kernel/entry_64.o head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o core-y += arch/powerpc/kernel/ \ - arch/$(OLDARCH)/kernel/ \ arch/powerpc/mm/ \ arch/powerpc/lib/ \ arch/powerpc/sysdev/ \ arch/powerpc/platforms/ +core-$(CONFIG_PPC32) += arch/ppc/kernel/ core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ core-$(CONFIG_APUS) += arch/ppc/amiga/ @@ -139,7 +140,7 @@ drivers-$(CONFIG_CPM2) += arch/ppc/8260_io/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ -defaultimage-$(CONFIG_PPC32) := uImage zImage +defaultimage-$(CONFIG_PPC32) := zImage defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux defaultimage-$(CONFIG_PPC_PSERIES) := zImage KBUILD_IMAGE := $(defaultimage-y) @@ -154,42 +155,33 @@ BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm .PHONY: $(BOOT_TARGETS) -boot := arch/$(OLDARCH)/boot +boot := arch/$(ARCH)/boot -# urk -ifeq ($(CONFIG_PPC64),y) $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) -else -$(BOOT_TARGETS): vmlinux - $(Q)$(MAKE) ARCH=ppc $(build)=$(boot) $@ -endif - -uImage: vmlinux - $(Q)$(MAKE) ARCH=$(OLDARCH) $(build)=$(boot)/images $(boot)/images/$@ define archhelp - @echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/images/zImage.*)' - @echo ' uImage - Create a bootable image for U-Boot / PPCBoot' + @echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' @echo ' install - Install kernel using' @echo ' (your) ~/bin/installkernel or' @echo ' (distribution) /sbin/installkernel or' @echo ' install to $$(INSTALL_PATH) and run lilo' - @echo ' *_defconfig - Select default config from arch/$(ARCH)/ppc/configs' + @echo ' *_defconfig - Select default config from arch/$(ARCH)/configs' endef archclean: $(Q)$(MAKE) $(clean)=$(boot) - # Temporary hack until we have migrated to asm-powerpc $(Q)rm -rf arch/$(ARCH)/include archprepare: checkbin +ifeq ($(CONFIG_PPC32),y) # Temporary hack until we have migrated to asm-powerpc include/asm: arch/$(ARCH)/include/asm -arch/$(ARCH)/include/asm: +arch/$(ARCH)/include/asm: FORCE $(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi $(Q)ln -fsn $(srctree)/include/asm-$(OLDARCH) arch/$(ARCH)/include/asm +endif # Use the file '.tmp_gas_check' for binutils tests, as gas won't output # to stdout and these checks are run even on install targets. diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile new file mode 100644 index 000000000000..9770f587af73 --- /dev/null +++ b/arch/powerpc/boot/Makefile @@ -0,0 +1,149 @@ +# Makefile for making ELF bootable images for booting on CHRP +# using Open Firmware. +# +# Geert Uytterhoeven September 1997 +# +# Based on coffboot by Paul Mackerras +# Simplified for ppc64 by Todd Inglett +# +# NOTE: this code is built for 32 bit in ELF32 format even though +# it packages a 64 bit kernel. We do this to simplify the +# bootloader and increase compatibility with OpenFirmware. +# +# To this end we need to define BOOTCC, etc, as the tools +# needed to build the 32 bit image. These are normally HOSTCC, +# but may be a third compiler if, for example, you are cross +# compiling from an intel box. Once the 64bit ppc gcc is +# stable it will probably simply be a compiler switch to +# compile for 32bit mode. +# To make it easier to setup a cross compiler, +# CROSS32_COMPILE is setup as a prefix just like CROSS_COMPILE +# in the toplevel makefile. + + +HOSTCC := gcc +BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem \ + $(shell $(CROSS32CC) -print-file-name=include) -fPIC +BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc +BOOTLFLAGS := -T $(srctree)/$(src)/zImage.lds +OBJCOPYFLAGS := contents,alloc,load,readonly,data + +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) +#$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) + +src-boot := string.S prom.c main.c div64.S crt0.S +src-boot += $(zlib) +src-boot := $(addprefix $(obj)/, $(src-boot)) +obj-boot := $(addsuffix .o, $(basename $(src-boot))) + +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + +quiet_cmd_copy_zlib = COPY $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@<linux/\([^>]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@<linux/\([^>]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@<linux/string.h>@\"string.h\"@;s@<linux/kernel.h>@<stddef.h>@;s@<linux/\([^>]\+\).*@\"\1\"@" $< > $@ + +$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlib) + +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + + +quiet_cmd_bootcc = BOOTCC $@ + cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + +quiet_cmd_bootas = BOOTAS $@ + cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + +quiet_cmd_bootld = BOOTLD $@ + cmd_bootld = $(CROSS32LD) $(BOOTLFLAGS) -o $@ $(2) + +$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c + $(call if_changed_dep,bootcc) +$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S + $(call if_changed_dep,bootas) + +#----------------------------------------------------------- +# ELF sections within the zImage bootloader/wrapper +#----------------------------------------------------------- +required := vmlinux.strip +initrd := initrd + +obj-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.o, $(section))) +src-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.c, $(section))) +gz-sec = $(foreach section, $(1), $(patsubst %,$(obj)/kernel-%.gz, $(section))) + +hostprogs-y := addnote addRamDisk +targets += zImage.vmode zImage.initrd.vmode zImage zImage.initrd \ + $(patsubst $(obj)/%,%, $(call obj-sec, $(required) $(initrd))) \ + $(patsubst $(obj)/%,%, $(call src-sec, $(required) $(initrd))) \ + $(patsubst $(obj)/%,%, $(call gz-sec, $(required) $(initrd))) \ + vmlinux.initrd +extra-y := initrd.o + +quiet_cmd_ramdisk = RAMDISK $@ + cmd_ramdisk = $(obj)/addRamDisk $(obj)/ramdisk.image.gz $< $@ + +quiet_cmd_stripvm = STRIP $@ + cmd_stripvm = $(STRIP) -s -R .comment $< -o $@ + +vmlinux.strip: vmlinux + $(call if_changed,stripvm) +$(obj)/vmlinux.initrd: vmlinux.strip $(obj)/addRamDisk $(obj)/ramdisk.image.gz + $(call if_changed,ramdisk) + +quiet_cmd_addsection = ADDSEC $@ + cmd_addsection = $(CROSS32OBJCOPY) $@ \ + --add-section=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(patsubst %.o,%.gz, $@) \ + --set-section-flags=.kernel:$(strip $(patsubst $(obj)/kernel-%.o,%, $@))=$(OBJCOPYFLAGS) + +quiet_cmd_addnote = ADDNOTE $@ + cmd_addnote = $(obj)/addnote $@ + +$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % + $(call if_changed,gzip) + +$(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz + cp -f $(obj)/ramdisk.image.gz $@ + +$(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz + @touch $@ + +$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c + $(call if_changed_dep,bootcc) + $(call cmd,addsection) + +$(obj)/zImage.vmode: obj-boot += $(call obj-sec, $(required)) +$(obj)/zImage.vmode: $(call obj-sec, $(required)) $(obj-boot) $(srctree)/$(src)/zImage.lds + $(call cmd,bootld,$(obj-boot)) + +$(obj)/zImage.initrd.vmode: obj-boot += $(call obj-sec, $(required) $(initrd)) +$(obj)/zImage.initrd.vmode: $(call obj-sec, $(required) $(initrd)) $(obj-boot) $(srctree)/$(src)/zImage.lds + $(call cmd,bootld,$(obj-boot)) + +$(obj)/zImage: $(obj)/zImage.vmode $(obj)/addnote + @cp -f $< $@ + $(call if_changed,addnote) + +$(obj)/zImage.initrd: $(obj)/zImage.initrd.vmode $(obj)/addnote + @cp -f $< $@ + $(call if_changed,addnote) + +install: $(CONFIGURE) $(BOOTIMAGE) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" "$(BOOTIMAGE)" + +clean-files := $(addprefix $(objtree)/, $(obj-boot) vmlinux.strip) diff --git a/arch/powerpc/boot/README b/arch/powerpc/boot/README new file mode 100644 index 000000000000..3e11058760e4 --- /dev/null +++ b/arch/powerpc/boot/README @@ -0,0 +1,11 @@ + +To extract the kernel vmlinux, System.map, .config or initrd from the zImage binary: + +objcopy -j .kernel:vmlinux -O binary zImage vmlinux.gz +objcopy -j .kernel:System.map -O binary zImage System.map.gz +objcopy -j .kernel:.config -O binary zImage config.gz +objcopy -j .kernel:initrd -O binary zImage.initrd initrd.gz + + + Peter + diff --git a/arch/powerpc/boot/addRamDisk.c b/arch/powerpc/boot/addRamDisk.c new file mode 100644 index 000000000000..c02a99952be7 --- /dev/null +++ b/arch/powerpc/boot/addRamDisk.c @@ -0,0 +1,311 @@ +#include <stdio.h> +#include <stdlib.h> +#include <netinet/in.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> +#include <elf.h> + +#define ElfHeaderSize (64 * 1024) +#define ElfPages (ElfHeaderSize / 4096) +#define KERNELBASE (0xc000000000000000) +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) + +struct addr_range { + unsigned long long addr; + unsigned long memsize; + unsigned long offset; +}; + +static int check_elf64(void *p, int size, struct addr_range *r) +{ + Elf64_Ehdr *elf64 = p; + Elf64_Phdr *elf64ph; + + if (elf64->e_ident[EI_MAG0] != ELFMAG0 || + elf64->e_ident[EI_MAG1] != ELFMAG1 || + elf64->e_ident[EI_MAG2] != ELFMAG2 || + elf64->e_ident[EI_MAG3] != ELFMAG3 || + elf64->e_ident[EI_CLASS] != ELFCLASS64 || + elf64->e_ident[EI_DATA] != ELFDATA2MSB || + elf64->e_type != ET_EXEC || elf64->e_machine != EM_PPC64) + return 0; + + if ((elf64->e_phoff + sizeof(Elf64_Phdr)) > size) + return 0; + + elf64ph = (Elf64_Phdr *) ((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + + r->memsize = (unsigned long)elf64ph->p_memsz; + r->offset = (unsigned long)elf64ph->p_offset; + r->addr = (unsigned long long)elf64ph->p_vaddr; + +#ifdef DEBUG + printf("PPC64 ELF file, ph:\n"); + printf("p_type 0x%08x\n", elf64ph->p_type); + printf("p_flags 0x%08x\n", elf64ph->p_flags); + printf("p_offset 0x%016llx\n", elf64ph->p_offset); + printf("p_vaddr 0x%016llx\n", elf64ph->p_vaddr); + printf("p_paddr 0x%016llx\n", elf64ph->p_paddr); + printf("p_filesz 0x%016llx\n", elf64ph->p_filesz); + printf("p_memsz 0x%016llx\n", elf64ph->p_memsz); + printf("p_align 0x%016llx\n", elf64ph->p_align); + printf("... skipping 0x%08lx bytes of ELF header\n", + (unsigned long)elf64ph->p_offset); +#endif + + return 64; +} +void get4k(FILE *file, char *buf ) +{ + unsigned j; + unsigned num = fread(buf, 1, 4096, file); + for ( j=num; j<4096; ++j ) + buf[j] = 0; +} + +void put4k(FILE *file, char *buf ) +{ + fwrite(buf, 1, 4096, file); +} + +void death(const char *msg, FILE *fdesc, const char *fname) +{ + fprintf(stderr, msg); + fclose(fdesc); + unlink(fname); + exit(1); +} + +int main(int argc, char **argv) +{ + char inbuf[4096]; + struct addr_range vmlinux; + FILE *ramDisk; + FILE *inputVmlinux; + FILE *outputVmlinux; + + char *rd_name, *lx_name, *out_name; + + size_t i; + unsigned long ramFileLen; + unsigned long ramLen; + unsigned long roundR; + unsigned long offset_end; + + unsigned long kernelLen; + unsigned long actualKernelLen; + unsigned long round; + unsigned long roundedKernelLen; + unsigned long ramStartOffs; + unsigned long ramPages; + unsigned long roundedKernelPages; + unsigned long hvReleaseData; + u_int32_t eyeCatcher = 0xc8a5d9c4; + unsigned long naca; + unsigned long xRamDisk; + unsigned long xRamDiskSize; + long padPages; + + + if (argc < 2) { + fprintf(stderr, "Name of RAM disk file missing.\n"); + exit(1); + } + rd_name = argv[1]; + + if (argc < 3) { + fprintf(stderr, "Name of vmlinux file missing.\n"); + exit(1); + } + lx_name = argv[2]; + + if (argc < 4) { + fprintf(stderr, "Name of vmlinux output file missing.\n"); + exit(1); + } + out_name = argv[3]; + + + ramDisk = fopen(rd_name, "r"); + if ( ! ramDisk ) { + fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", rd_name); + exit(1); + } + + inputVmlinux = fopen(lx_name, "r"); + if ( ! inputVmlinux ) { + fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", lx_name); + exit(1); + } + + outputVmlinux = fopen(out_name, "w+"); + if ( ! outputVmlinux ) { + fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", out_name); + exit(1); + } + + i = fread(inbuf, 1, sizeof(inbuf), inputVmlinux); + if (i != sizeof(inbuf)) { + fprintf(stderr, "can not read vmlinux file %s: %u\n", lx_name, i); + exit(1); + } + + i = check_elf64(inbuf, sizeof(inbuf), &vmlinux); + if (i == 0) { + fprintf(stderr, "You must have a linux kernel specified as argv[2]\n"); + exit(1); + } + + /* Input Vmlinux file */ + fseek(inputVmlinux, 0, SEEK_END); + kernelLen = ftell(inputVmlinux); + fseek(inputVmlinux, 0, SEEK_SET); + printf("kernel file size = %lu\n", kernelLen); + + actualKernelLen = kernelLen - ElfHeaderSize; + + printf("actual kernel length (minus ELF header) = %lu\n", actualKernelLen); + + round = actualKernelLen % 4096; + roundedKernelLen = actualKernelLen; + if ( round ) + roundedKernelLen += (4096 - round); + printf("Vmlinux length rounded up to a 4k multiple = %ld/0x%lx \n", roundedKernelLen, roundedKernelLen); + roundedKernelPages = roundedKernelLen / 4096; + printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages); + + offset_end = _ALIGN_UP(vmlinux.memsize, 4096); + /* calc how many pages we need to insert between the vmlinux and the start of the ram disk */ + padPages = offset_end/4096 - roundedKernelPages; + + /* Check and see if the vmlinux is already larger than _end in System.map */ + if (padPages < 0) { + /* vmlinux is larger than _end - adjust the offset to the start of the embedded ram disk */ + offset_end = roundedKernelLen; + printf("vmlinux is larger than _end indicates it needs to be - offset_end = %lx \n", offset_end); + padPages = 0; + printf("will insert %lx pages between the vmlinux and the start of the ram disk \n", padPages); + } + else { + /* _end is larger than vmlinux - use the offset to _end that we calculated from the system map */ + printf("vmlinux is smaller than _end indicates is needed - offset_end = %lx \n", offset_end); + printf("will insert %lx pages between the vmlinux and the start of the ram disk \n", padPages); + } + + + + /* Input Ram Disk file */ + // Set the offset that the ram disk will be started at. + ramStartOffs = offset_end; /* determined from the input vmlinux file and the system map */ + printf("Ram Disk will start at offset = 0x%lx \n", ramStartOffs); + + fseek(ramDisk, 0, SEEK_END); + ramFileLen = ftell(ramDisk); + fseek(ramDisk, 0, SEEK_SET); + printf("%s file size = %ld/0x%lx \n", rd_name, ramFileLen, ramFileLen); + + ramLen = ramFileLen; + + roundR = 4096 - (ramLen % 4096); + if ( roundR ) { + printf("Rounding RAM disk file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR); + ramLen += roundR; + } + + printf("Rounded RAM disk size is %ld/0x%lx \n", ramLen, ramLen); + ramPages = ramLen / 4096; + printf("RAM disk pages to copy = %ld/0x%lx\n", ramPages, ramPages); + + + + // Copy 64K ELF header + for (i=0; i<(ElfPages); ++i) { + get4k( inputVmlinux, inbuf ); + put4k( outputVmlinux, inbuf ); + } + + /* Copy the vmlinux (as full pages). */ + fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); + for ( i=0; i<roundedKernelPages; ++i ) { + get4k( inputVmlinux, inbuf ); + put4k( outputVmlinux, inbuf ); + } + + /* Insert pad pages (if appropriate) that are needed between */ + /* | the end of the vmlinux and the ram disk. */ + for (i=0; i<padPages; ++i) { + memset(inbuf, 0, 4096); + put4k(outputVmlinux, inbuf); + } + + /* Copy the ram disk (as full pages). */ + for ( i=0; i<ramPages; ++i ) { + get4k( ramDisk, inbuf ); + put4k( outputVmlinux, inbuf ); + } + + /* Close the input files */ + fclose(ramDisk); + fclose(inputVmlinux); + /* And flush the written output file */ + fflush(outputVmlinux); + + + + /* Fixup the new vmlinux to contain the ram disk starting offset (xRamDisk) and the ram disk size (xRamDiskSize) */ + /* fseek to the hvReleaseData pointer */ + fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET); + if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) { + death("Could not read hvReleaseData pointer\n", outputVmlinux, out_name); + } + hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */ + printf("hvReleaseData is at %08lx\n", hvReleaseData); + + /* fseek to the hvReleaseData */ + fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET); + if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) { + death("Could not read hvReleaseData\n", outputVmlinux, out_name); + } + /* Check hvReleaseData sanity */ + if (memcmp(inbuf, &eyeCatcher, 4) != 0) { + death("hvReleaseData is invalid\n", outputVmlinux, out_name); + } + /* Get the naca pointer */ + naca = ntohl(*((u_int32_t*) &inbuf[0x0C])) - KERNELBASE; + printf("Naca is at offset 0x%lx \n", naca); + + /* fseek to the naca */ + fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); + if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) { + death("Could not read naca\n", outputVmlinux, out_name); + } + xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c])); + xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14])); + /* Make sure a RAM disk isn't already present */ + if ((xRamDisk != 0) || (xRamDiskSize != 0)) { + death("RAM disk is already attached to this kernel\n", outputVmlinux, out_name); + } + /* Fill in the values */ + *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs); + *((u_int32_t *) &inbuf[0x14]) = htonl(ramPages); + + /* Write out the new naca */ + fflush(outputVmlinux); + fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); + if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) { + death("Could not write naca\n", outputVmlinux, out_name); + } + printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08lx\n", + ramPages, ramStartOffs); + + /* Done */ + fclose(outputVmlinux); + /* Set permission to executable */ + chmod(out_name, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + + return 0; +} + diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c new file mode 100644 index 000000000000..8041a9845ab7 --- /dev/null +++ b/arch/powerpc/boot/addnote.c @@ -0,0 +1,205 @@ +/* + * Program to hack in a PT_NOTE program header entry in an ELF file. + * This is needed for OF on RS/6000s to load an image correctly. + * Note that OF needs a program header entry for the note, not an + * ELF section. + * + * Copyright 2000 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Usage: addnote zImage + */ +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> + +/* CHRP note section */ +char arch[] = "PowerPC"; + +#define N_DESCR 6 +unsigned int descr[N_DESCR] = { + 0xffffffff, /* real-mode = true */ + 0x00c00000, /* real-base, i.e. where we expect OF to be */ + 0xffffffff, /* real-size */ + 0xffffffff, /* virt-base */ + 0xffffffff, /* virt-size */ + 0x4000, /* load-base */ +}; + +/* RPA note section */ +char rpaname[] = "IBM,RPA-Client-Config"; + +/* + * Note: setting ignore_my_client_config *should* mean that OF ignores + * all the other fields, but there is a firmware bug which means that + * it looks at the splpar field at least. So these values need to be + * reasonable. + */ +#define N_RPA_DESCR 8 +unsigned int rpanote[N_RPA_DESCR] = { + 0, /* lparaffinity */ + 64, /* min_rmo_size */ + 0, /* min_rmo_percent */ + 40, /* max_pft_size */ + 1, /* splpar */ + -1, /* min_load */ + 0, /* new_mem_def */ + 1, /* ignore_my_client_config */ +}; + +#define ROUNDUP(len) (((len) + 3) & ~3) + +unsigned char buf[512]; + +#define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1])) +#define GET_32BE(off) ((GET_16BE(off) << 16) + GET_16BE((off)+2)) + +#define PUT_16BE(off, v) (buf[off] = ((v) >> 8) & 0xff, \ + buf[(off) + 1] = (v) & 0xff) +#define PUT_32BE(off, v) (PUT_16BE((off), (v) >> 16), \ + PUT_16BE((off) + 2, (v))) + +/* Structure of an ELF file */ +#define E_IDENT 0 /* ELF header */ +#define E_PHOFF 28 +#define E_PHENTSIZE 42 +#define E_PHNUM 44 +#define E_HSIZE 52 /* size of ELF header */ + +#define EI_MAGIC 0 /* offsets in E_IDENT area */ +#define EI_CLASS 4 +#define EI_DATA 5 + +#define PH_TYPE 0 /* ELF program header */ +#define PH_OFFSET 4 +#define PH_FILESZ 16 +#define PH_HSIZE 32 /* size of program header */ + +#define PT_NOTE 4 /* Program header type = note */ + +#define ELFCLASS32 1 +#define ELFDATA2MSB 2 + +unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' }; + +int +main(int ac, char **av) +{ + int fd, n, i; + int ph, ps, np; + int nnote, nnote2, ns; + + if (ac != 2) { + fprintf(stderr, "Usage: %s elf-file\n", av[0]); + exit(1); + } + fd = open(av[1], O_RDWR); + if (fd < 0) { + perror(av[1]); + exit(1); + } + + nnote = 12 + ROUNDUP(strlen(arch) + 1) + sizeof(descr); + nnote2 = 12 + ROUNDUP(strlen(rpaname) + 1) + sizeof(rpanote); + + n = read(fd, buf, sizeof(buf)); + if (n < 0) { + perror("read"); + exit(1); + } + + if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0) + goto notelf; + + if (buf[E_IDENT+EI_CLASS] != ELFCLASS32 + || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) { + fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n", + av[1]); + exit(1); + } + + ph = GET_32BE(E_PHOFF); + ps = GET_16BE(E_PHENTSIZE); + np = GET_16BE(E_PHNUM); + if (ph < E_HSIZE || ps < PH_HSIZE || np < 1) + goto notelf; + if (ph + (np + 2) * ps + nnote + nnote2 > n) + goto nospace; + + for (i = 0; i < np; ++i) { + if (GET_32BE(ph + PH_TYPE) == PT_NOTE) { + fprintf(stderr, "%s already has a note entry\n", + av[1]); + exit(0); + } + ph += ps; + } + + /* XXX check that the area we want to use is all zeroes */ + for (i = 0; i < 2 * ps + nnote + nnote2; ++i) + if (buf[ph + i] != 0) + goto nospace; + + /* fill in the program header entry */ + ns = ph + 2 * ps; + PUT_32BE(ph + PH_TYPE, PT_NOTE); + PUT_32BE(ph + PH_OFFSET, ns); + PUT_32BE(ph + PH_FILESZ, nnote); + + /* fill in the note area we point to */ + /* XXX we should probably make this a proper section */ + PUT_32BE(ns, strlen(arch) + 1); + PUT_32BE(ns + 4, N_DESCR * 4); + PUT_32BE(ns + 8, 0x1275); + strcpy((char *) &buf[ns + 12], arch); + ns += 12 + strlen(arch) + 1; + for (i = 0; i < N_DESCR; ++i, ns += 4) + PUT_32BE(ns, descr[i]); + + /* fill in the second program header entry and the RPA note area */ + ph += ps; + PUT_32BE(ph + PH_TYPE, PT_NOTE); + PUT_32BE(ph + PH_OFFSET, ns); + PUT_32BE(ph + PH_FILESZ, nnote2); + + /* fill in the note area we point to */ + PUT_32BE(ns, strlen(rpaname) + 1); + PUT_32BE(ns + 4, sizeof(rpanote)); + PUT_32BE(ns + 8, 0x12759999); + strcpy((char *) &buf[ns + 12], rpaname); + ns += 12 + ROUNDUP(strlen(rpaname) + 1); + for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) + PUT_32BE(ns, rpanote[i]); + + /* Update the number of program headers */ + PUT_16BE(E_PHNUM, np + 2); + + /* write back */ + lseek(fd, (long) 0, SEEK_SET); + i = write(fd, buf, n); + if (i < 0) { + perror("write"); + exit(1); + } + if (i < n) { + fprintf(stderr, "%s: write truncated\n", av[1]); + exit(1); + } + + exit(0); + + notelf: + fprintf(stderr, "%s does not appear to be an ELF file\n", av[1]); + exit(1); + + nospace: + fprintf(stderr, "sorry, I can't find space in %s to put the note\n", + av[1]); + exit(1); +} diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S new file mode 100644 index 000000000000..d2f2ace56cd3 --- /dev/null +++ b/arch/powerpc/boot/crt0.S @@ -0,0 +1,58 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + */ + +#include "ppc_asm.h" + + .text + .globl _zimage_start +_zimage_start: + bl 1f + +1: + mflr r0 + lis r9,1b@ha + addi r9,r9,1b@l + subf. r0,r9,r0 + beq 3f + + lis r9,__got2_start@ha + addi r9,r9,__got2_start@l + lis r8,__got2_end@ha + addi r8,r8,__got2_end@l + subf. r8,r9,r8 + beq 3f + srwi. r8,r8,2 + mtctr r8 + add r9,r0,r9 +2: + lwz r8,0(r9) + add r8,r8,r0 + stw r8,0(r9) + addi r9,r9,4 + bdnz 2b + +3: + lis r9,_start@h + add r9,r0,r9 + lis r8,_etext@ha + addi r8,r8,_etext@l + add r8,r0,r8 +4: dcbf r0,r9 + icbi r0,r9 + addi r9,r9,0x20 + cmplwi 0,r9,8 + blt 4b + sync + isync + + mr r6,r1 + b start + diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S new file mode 100644 index 000000000000..722f360a32a9 --- /dev/null +++ b/arch/powerpc/boot/div64.S @@ -0,0 +1,58 @@ +/* + * Divide a 64-bit unsigned number by a 32-bit unsigned number. + * This routine assumes that the top 32 bits of the dividend are + * non-zero to start with. + * On entry, r3 points to the dividend, which get overwritten with + * the 64-bit quotient, and r4 contains the divisor. + * On exit, r3 contains the remainder. + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "ppc_asm.h" + + .globl __div64_32 +__div64_32: + lwz r5,0(r3) # get the dividend into r5/r6 + lwz r6,4(r3) + cmplw r5,r4 + li r7,0 + li r8,0 + blt 1f + divwu r7,r5,r4 # if dividend.hi >= divisor, + mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor + subf. r5,r0,r5 # dividend.hi %= divisor + beq 3f +1: mr r11,r5 # here dividend.hi != 0 + andis. r0,r5,0xc000 + bne 2f + cntlzw r0,r5 # we are shifting the dividend right + li r10,-1 # to make it < 2^32, and shifting + srw r10,r10,r0 # the divisor right the same amount, + add r9,r4,r10 # rounding up (so the estimate cannot + andc r11,r6,r10 # ever be too large, only too small) + andc r9,r9,r10 + or r11,r5,r11 + rotlw r9,r9,r0 + rotlw r11,r11,r0 + divwu r11,r11,r9 # then we divide the shifted quantities +2: mullw r10,r11,r4 # to get an estimate of the quotient, + mulhwu r9,r11,r4 # multiply the estimate by the divisor, + subfc r6,r10,r6 # take the product from the divisor, + add r8,r8,r11 # and add the estimate to the accumulated + subfe. r5,r9,r5 # quotient + bne 1b +3: cmplw r6,r4 + blt 4f + divwu r0,r6,r4 # perform the remaining 32-bit division + mullw r10,r0,r4 # and get the remainder + add r8,r8,r0 + subf r6,r10,r6 +4: stw r7,0(r3) # return the quotient in *r3 + stw r8,4(r3) + mr r3,r6 # return the remainder in r3 + blr diff --git a/arch/powerpc/boot/elf.h b/arch/powerpc/boot/elf.h new file mode 100644 index 000000000000..d4828fcf1cb9 --- /dev/null +++ b/arch/powerpc/boot/elf.h @@ -0,0 +1,149 @@ +#ifndef _PPC_BOOT_ELF_H_ +#define _PPC_BOOT_ELF_H_ + +/* 32-bit ELF base types. */ +typedef unsigned int Elf32_Addr; +typedef unsigned short Elf32_Half; +typedef unsigned int Elf32_Off; +typedef signed int Elf32_Sword; +typedef unsigned int Elf32_Word; + +/* 64-bit ELF base types. */ +typedef unsigned long long Elf64_Addr; +typedef unsigned short Elf64_Half; +typedef signed short Elf64_SHalf; +typedef unsigned long long Elf64_Off; +typedef signed int Elf64_Sword; +typedef unsigned int Elf64_Word; +typedef unsigned long long Elf64_Xword; +typedef signed long long Elf64_Sxword; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME 0x6474e550 + +#define PT_GNU_STACK (PT_LOOS + 0x474e551) + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* These constants define the various ELF target machines */ +#define EM_NONE 0 +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC64 */ + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[16]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#endif /* _PPC_BOOT_ELF_H_ */ diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh new file mode 100644 index 000000000000..eacce9590816 --- /dev/null +++ b/arch/powerpc/boot/install.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# arch/ppc64/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Blatantly stolen from in arch/i386/boot/install.sh by Dave Hansen +# +# "make install" script for ppc64 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# $5 - kernel boot file, the zImage +# + +# User may have a custom install script + +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi + +# Default install + +# this should work for both the pSeries zImage and the iSeries vmlinux.sm +image_name=`basename $2` + +if [ -f $4/$image_name ]; then + mv $4/$image_name $4/$image_name.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/$image_name +cp $3 $4/System.map diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c new file mode 100644 index 000000000000..64ec93116fa6 --- /dev/null +++ b/arch/powerpc/boot/main.c @@ -0,0 +1,321 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <stdarg.h> +#include <stddef.h> +#include "elf.h" +#include "page.h" +#include "string.h" +#include "stdio.h" +#include "prom.h" +#include "zlib.h" + +extern void flush_cache(void *, unsigned long); + + +/* Value picked to match that used by yaboot */ +#define PROG_START 0x01400000 +#define RAM_END (512<<20) // Fixme: use OF */ +#define ONE_MB 0x100000 + +extern char _start[]; +extern char __bss_start[]; +extern char _end[]; +extern char _vmlinux_start[]; +extern char _vmlinux_end[]; +extern char _initrd_start[]; +extern char _initrd_end[]; + +struct addr_range { + unsigned long addr; + unsigned long size; + unsigned long memsize; +}; +static struct addr_range vmlinux; +static struct addr_range vmlinuz; +static struct addr_range initrd; + +static unsigned long elfoffset; + +static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ +static char elfheader[256]; + + +typedef void (*kernel_entry_t)( unsigned long, + unsigned long, + void *, + void *); + + +#undef DEBUG + +static unsigned long claim_base; + +#define HEAD_CRC 2 +#define EXTRA_FIELD 4 +#define ORIG_NAME 8 +#define COMMENT 0x10 +#define RESERVED 0xe0 + +static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +{ + z_stream s; + int r, i, flags; + + /* skip header */ + i = 10; + flags = src[3]; + if (src[2] != Z_DEFLATED || (flags & RESERVED) != 0) { + printf("bad gzipped data\n\r"); + exit(); + } + if ((flags & EXTRA_FIELD) != 0) + i = 12 + src[10] + (src[11] << 8); + if ((flags & ORIG_NAME) != 0) + while (src[i++] != 0) + ; + if ((flags & COMMENT) != 0) + while (src[i++] != 0) + ; + if ((flags & HEAD_CRC) != 0) + i += 2; + if (i >= *lenp) { + printf("gunzip: ran out of data in header\n\r"); + exit(); + } + + if (zlib_inflate_workspacesize() > sizeof(scratch)) { + printf("gunzip needs more mem\n"); + exit(); + } + memset(&s, 0, sizeof(s)); + s.workspace = scratch; + r = zlib_inflateInit2(&s, -MAX_WBITS); + if (r != Z_OK) { + printf("inflateInit2 returned %d\n\r", r); + exit(); + } + s.next_in = src + i; + s.avail_in = *lenp - i; + s.next_out = dst; + s.avail_out = dstlen; + r = zlib_inflate(&s, Z_FULL_FLUSH); + if (r != Z_OK && r != Z_STREAM_END) { + printf("inflate returned %d msg: %s\n\r", r, s.msg); + exit(); + } + *lenp = s.next_out - (unsigned char *) dst; + zlib_inflateEnd(&s); +} + +static unsigned long try_claim(unsigned long size) +{ + unsigned long addr = 0; + + for(; claim_base < RAM_END; claim_base += ONE_MB) { +#ifdef DEBUG + printf(" trying: 0x%08lx\n\r", claim_base); +#endif + addr = (unsigned long)claim(claim_base, size, 0); + if ((void *)addr != (void *)-1) + break; + } + if (addr == 0) + return 0; + claim_base = PAGE_ALIGN(claim_base + size); + return addr; +} + +static int is_elf64(void *hdr) +{ + Elf64_Ehdr *elf64 = hdr; + Elf64_Phdr *elf64ph; + unsigned int i; + + if (!(elf64->e_ident[EI_MAG0] == ELFMAG0 && + elf64->e_ident[EI_MAG1] == ELFMAG1 && + elf64->e_ident[EI_MAG2] == ELFMAG2 && + elf64->e_ident[EI_MAG3] == ELFMAG3 && + elf64->e_ident[EI_CLASS] == ELFCLASS64 && + elf64->e_ident[EI_DATA] == ELFDATA2MSB && + elf64->e_type == ET_EXEC && + elf64->e_machine == EM_PPC64)) + return 0; + + elf64ph = (Elf64_Phdr *)((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + for (i = 0; i < (unsigned int)elf64->e_phnum; i++, elf64ph++) + if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) + break; + if (i >= (unsigned int)elf64->e_phnum) + return 0; + + elfoffset = (unsigned long)elf64ph->p_offset; + vmlinux.size = (unsigned long)elf64ph->p_filesz + elfoffset; + vmlinux.memsize = (unsigned long)elf64ph->p_memsz + elfoffset; + return 1; +} + +static int is_elf32(void *hdr) +{ + Elf32_Ehdr *elf32 = hdr; + Elf32_Phdr *elf32ph; + unsigned int i; + + if (!(elf32->e_ident[EI_MAG0] == ELFMAG0 && + elf32->e_ident[EI_MAG1] == ELFMAG1 && + elf32->e_ident[EI_MAG2] == ELFMAG2 && + elf32->e_ident[EI_MAG3] == ELFMAG3 && + elf32->e_ident[EI_CLASS] == ELFCLASS32 && + elf32->e_ident[EI_DATA] == ELFDATA2MSB && + elf32->e_type == ET_EXEC && + elf32->e_machine == EM_PPC)) + return 0; + + elf32 = (Elf32_Ehdr *)elfheader; + elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff); + for (i = 0; i < elf32->e_phnum; i++, elf32ph++) + if (elf32ph->p_type == PT_LOAD && elf32ph->p_offset != 0) + break; + if (i >= elf32->e_phnum) + return 0; + + elfoffset = elf32ph->p_offset; + vmlinux.size = elf32ph->p_filesz + elf32ph->p_offset; + vmlinux.memsize = elf32ph->p_memsz + elf32ph->p_offset; + return 1; +} + +void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) +{ + int len; + kernel_entry_t kernel_entry; + + memset(__bss_start, 0, _end - __bss_start); + + prom = (int (*)(void *)) promptr; + chosen_handle = finddevice("/chosen"); + if (chosen_handle == (void *) -1) + exit(); + if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) + exit(); + stderr = stdout; + if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) + exit(); + + printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp); + + vmlinuz.addr = (unsigned long)_vmlinux_start; + vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); + + /* gunzip the ELF header of the kernel */ + if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { + len = vmlinuz.size; + gunzip(elfheader, sizeof(elfheader), + (unsigned char *)vmlinuz.addr, &len); + } else + memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader)); + + if (!is_elf64(elfheader) && !is_elf32(elfheader)) { + printf("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); + exit(); + } + + /* + * The first available claim_base must be above the end of the + * the loaded kernel wrapper file (_start to _end includes the + * initrd image if it is present) and rounded up to a nice + * 1 MB boundary for good measure. + */ + + claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB); + +#if defined(PROG_START) + /* + * Maintain a "magic" minimum address. This keeps some older + * firmware platforms running. + */ + + if (claim_base < PROG_START) + claim_base = PROG_START; +#endif + + /* We need to claim the memsize plus the file offset since gzip + * will expand the header (file offset), then the kernel, then + * possible rubbish we don't care about. But the kernel bss must + * be claimed (it will be zero'd by the kernel itself) + */ + printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize); + vmlinux.addr = try_claim(vmlinux.memsize); + if (vmlinux.addr == 0) { + printf("Can't allocate memory for kernel image !\n\r"); + exit(); + } + + /* + * Now we try to claim memory for the initrd (and copy it there) + */ + initrd.size = (unsigned long)(_initrd_end - _initrd_start); + initrd.memsize = initrd.size; + if ( initrd.size > 0 ) { + printf("Allocating 0x%lx bytes for initrd ...\n\r", initrd.size); + initrd.addr = try_claim(initrd.size); + if (initrd.addr == 0) { + printf("Can't allocate memory for initial ramdisk !\n\r"); + exit(); + } + a1 = initrd.addr; + a2 = initrd.size; + printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r", + initrd.addr, (unsigned long)_initrd_start, initrd.size); + memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size); + printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr)); + } + + /* Eventually gunzip the kernel */ + if (*(unsigned short *)vmlinuz.addr == 0x1f8b) { + printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...", + vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size); + len = vmlinuz.size; + gunzip((void *)vmlinux.addr, vmlinux.memsize, + (unsigned char *)vmlinuz.addr, &len); + printf("done 0x%lx bytes\n\r", len); + } else { + memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size); + } + + /* Skip over the ELF header */ +#ifdef DEBUG + printf("... skipping 0x%lx bytes of ELF header\n\r", + elfoffset); +#endif + vmlinux.addr += elfoffset; + + flush_cache((void *)vmlinux.addr, vmlinux.size); + + kernel_entry = (kernel_entry_t)vmlinux.addr; +#ifdef DEBUG + printf( "kernel:\n\r" + " entry addr = 0x%lx\n\r" + " a1 = 0x%lx,\n\r" + " a2 = 0x%lx,\n\r" + " prom = 0x%lx,\n\r" + " bi_recs = 0x%lx,\n\r", + (unsigned long)kernel_entry, a1, a2, + (unsigned long)prom, NULL); +#endif + + kernel_entry(a1, a2, prom, NULL); + + printf("Error: Linux kernel returned to zImage bootloader!\n\r"); + + exit(); +} + diff --git a/arch/powerpc/boot/page.h b/arch/powerpc/boot/page.h new file mode 100644 index 000000000000..14eca30fef64 --- /dev/null +++ b/arch/powerpc/boot/page.h @@ -0,0 +1,34 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h new file mode 100644 index 000000000000..1c2c2817f9b7 --- /dev/null +++ b/arch/powerpc/boot/ppc_asm.h @@ -0,0 +1,62 @@ +#ifndef _PPC64_PPC_ASM_H +#define _PPC64_PPC_ASM_H +/* + * + * Definitions used by various bits of low-level assembly code on PowerPC. + * + * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/powerpc/boot/prom.c b/arch/powerpc/boot/prom.c new file mode 100644 index 000000000000..4bea2f4dcb06 --- /dev/null +++ b/arch/powerpc/boot/prom.c @@ -0,0 +1,499 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <stdarg.h> +#include <stddef.h> +#include "string.h" +#include "stdio.h" +#include "prom.h" + +int (*prom)(void *); + +void *chosen_handle; + +void *stdin; +void *stdout; +void *stderr; + + +int +write(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "write"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +int +read(void *handle, void *ptr, int nb) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *ihandle; + void *addr; + int len; + int actual; + } args; + + args.service = "read"; + args.nargs = 3; + args.nret = 1; + args.ihandle = handle; + args.addr = ptr; + args.len = nb; + args.actual = -1; + (*prom)(&args); + return args.actual; +} + +void +exit() +{ + struct prom_args { + char *service; + } args; + + for (;;) { + args.service = "exit"; + (*prom)(&args); + } +} + +void +pause(void) +{ + struct prom_args { + char *service; + } args; + + args.service = "enter"; + (*prom)(&args); +} + +void * +finddevice(const char *name) +{ + struct prom_args { + char *service; + int nargs; + int nret; + const char *devspec; + void *phandle; + } args; + + args.service = "finddevice"; + args.nargs = 1; + args.nret = 1; + args.devspec = name; + args.phandle = (void *) -1; + (*prom)(&args); + return args.phandle; +} + +void * +claim(unsigned long virt, unsigned long size, unsigned long align) +{ + struct prom_args { + char *service; + int nargs; + int nret; + unsigned int virt; + unsigned int size; + unsigned int align; + void *ret; + } args; + + args.service = "claim"; + args.nargs = 3; + args.nret = 1; + args.virt = virt; + args.size = size; + args.align = align; + (*prom)(&args); + return args.ret; +} + +int +getprop(void *phandle, const char *name, void *buf, int buflen) +{ + struct prom_args { + char *service; + int nargs; + int nret; + void *phandle; + const char *name; + void *buf; + int buflen; + int size; + } args; + + args.service = "getprop"; + args.nargs = 4; + args.nret = 1; + args.phandle = phandle; + args.name = name; + args.buf = buf; + args.buflen = buflen; + args.size = -1; + (*prom)(&args); + return args.size; +} + +int +putc(int c, void *f) +{ + char ch = c; + + if (c == '\n') + putc('\r', f); + return write(f, &ch, 1) == 1? c: -1; +} + +int +putchar(int c) +{ + return putc(c, stdout); +} + +int +fputs(char *str, void *f) +{ + int n = strlen(str); + + return write(f, str, n) == n? 0: -1; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +extern unsigned int __div64_32(unsigned long long *dividend, + unsigned int divisor); + +/* The unnecessary pointer compare is there + * to check for type safety (n must be 64bit) + */ +# define do_div(n,base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ + if (((n) >> 32) == 0) { \ + __rem = (unsigned int)(n) % __base; \ + (n) = (unsigned int)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) + +static int skip_atoi(const char **s) +{ + int i, c; + + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, unsigned long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if ((signed long long)num < 0) { + sign = '-'; + num = - (signed long long)num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) { + tmp[i++] = digits[do_div(num, base)]; + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if ('0' <= *fmt && *fmt <= '9') + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if ('0' <= *fmt && *fmt <= '9') + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = "<NULL>"; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +static char sprint_buf[1024]; + +int +printf(const char *fmt, ...) +{ + va_list args; + int n; + + va_start(args, fmt); + n = vsprintf(sprint_buf, fmt, args); + va_end(args); + write(stdout, sprint_buf, n); + return n; +} diff --git a/arch/powerpc/boot/prom.h b/arch/powerpc/boot/prom.h new file mode 100644 index 000000000000..96ab5aec740c --- /dev/null +++ b/arch/powerpc/boot/prom.h @@ -0,0 +1,18 @@ +#ifndef _PPC_BOOT_PROM_H_ +#define _PPC_BOOT_PROM_H_ + +extern int (*prom) (void *); +extern void *chosen_handle; + +extern void *stdin; +extern void *stdout; +extern void *stderr; + +extern int write(void *handle, void *ptr, int nb); +extern int read(void *handle, void *ptr, int nb); +extern void exit(void); +extern void pause(void); +extern void *finddevice(const char *); +extern void *claim(unsigned long virt, unsigned long size, unsigned long align); +extern int getprop(void *phandle, const char *name, void *buf, int buflen); +#endif /* _PPC_BOOT_PROM_H_ */ diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h new file mode 100644 index 000000000000..24bd3a8dee94 --- /dev/null +++ b/arch/powerpc/boot/stdio.h @@ -0,0 +1,16 @@ +#ifndef _PPC_BOOT_STDIO_H_ +#define _PPC_BOOT_STDIO_H_ + +extern int printf(const char *fmt, ...); + +extern int sprintf(char *buf, const char *fmt, ...); + +extern int vsprintf(char *buf, const char *fmt, va_list args); + +extern int putc(int c, void *f); +extern int putchar(int c); +extern int getchar(void); + +extern int fputs(char *str, void *f); + +#endif /* _PPC_BOOT_STDIO_H_ */ diff --git a/arch/powerpc/boot/string.S b/arch/powerpc/boot/string.S new file mode 100644 index 000000000000..b1eeaed7db17 --- /dev/null +++ b/arch/powerpc/boot/string.S @@ -0,0 +1,216 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + */ + +#include "ppc_asm.h" + + .text + .globl strcpy +strcpy: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strncpy +strncpy: + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + blr + + .globl strcat +strcat: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + + .globl strcmp +strcmp: + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + + .globl strlen +strlen: + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + + .globl memset +memset: + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + rlwinm r0,r5,32-2,2,31 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + + .globl memmove +memmove: + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + + .globl memcpy +memcpy: + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl backwards_memcpy +backwards_memcpy: + rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + + .globl memcmp +memcmp: + cmpwi 0,r5,0 + blelr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr + + +/* + * Flush the dcache and invalidate the icache for a range of addresses. + * + * flush_cache(addr, len) + */ + .global flush_cache +flush_cache: + addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */ + rlwinm. 4,4,27,5,31 + mtctr 4 + beqlr +1: dcbf 0,3 + icbi 0,3 + addi 3,3,0x20 + bdnz 1b + sync + isync + blr + diff --git a/arch/powerpc/boot/string.h b/arch/powerpc/boot/string.h new file mode 100644 index 000000000000..9fdff1cc0d70 --- /dev/null +++ b/arch/powerpc/boot/string.h @@ -0,0 +1,17 @@ +#ifndef _PPC_BOOT_STRING_H_ +#define _PPC_BOOT_STRING_H_ +#include <stddef.h> + +extern char *strcpy(char *dest, const char *src); +extern char *strncpy(char *dest, const char *src, size_t n); +extern char *strcat(char *dest, const char *src); +extern int strcmp(const char *s1, const char *s2); +extern size_t strlen(const char *s); +extern size_t strnlen(const char *s, size_t count); + +extern void *memset(void *s, int c, size_t n); +extern void *memmove(void *dest, const void *src, unsigned long n); +extern void *memcpy(void *dest, const void *src, unsigned long n); +extern int memcmp(const void *s1, const void *s2, size_t n); + +#endif /* _PPC_BOOT_STRING_H_ */ diff --git a/arch/powerpc/boot/zImage.lds b/arch/powerpc/boot/zImage.lds new file mode 100644 index 000000000000..4b6bb3ffe3dc --- /dev/null +++ b/arch/powerpc/boot/zImage.lds @@ -0,0 +1,46 @@ +OUTPUT_ARCH(powerpc:common) +ENTRY(_zimage_start) +SECTIONS +{ + . = (4*1024*1024); + _start = .; + .text : + { + *(.text) + *(.fixup) + } + _etext = .; + . = ALIGN(4096); + .data : + { + *(.rodata*) + *(.data*) + *(.sdata*) + __got2_start = .; + *(.got2) + __got2_end = .; + } + + . = ALIGN(4096); + _vmlinux_start = .; + .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) } + _vmlinux_end = .; + + . = ALIGN(4096); + _initrd_start = .; + .kernel:initrd : { *(.kernel:initrd) } + _initrd_end = .; + + . = ALIGN(4096); + _edata = .; + + . = ALIGN(4096); + __bss_start = .; + .bss : + { + *(.sbss) + *(.bss) + } + . = ALIGN(4096); + _end = . ; +} diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 67ffecbc05cb..b657f7e44762 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -1,18 +1,33 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14-rc4 -# Thu Oct 20 08:29:10 2005 +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:26 2005 # +CONFIG_PPC64=y CONFIG_64BIT=y +CONFIG_PPC_MERGE=y CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Processor support +# +# CONFIG_POWER4_ONLY is not set +CONFIG_POWER3=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +CONFIG_ALTIVEC=y +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=4 # # Code maturity level options @@ -38,6 +53,7 @@ CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set # CONFIG_CPUSETS is not set CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -66,31 +82,69 @@ CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODULE_SRCVERSION_ALL is not set # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y -CONFIG_SYSVIPC_COMPAT=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -# CONFIG_PPC_ISERIES is not set CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set # CONFIG_PPC_PSERIES is not set -CONFIG_PPC_BPA=y # CONFIG_PPC_PMAC is not set # CONFIG_PPC_MAPLE is not set -CONFIG_PPC=y -CONFIG_PPC64=y +CONFIG_PPC_CELL=y CONFIG_PPC_OF=y -CONFIG_BPA_IIC=y -CONFIG_ALTIVEC=y -CONFIG_KEXEC=y # CONFIG_U3_DART is not set -# CONFIG_BOOTX_TEXT is not set -# CONFIG_POWER4_ONLY is not set +CONFIG_PPC_RTAS=y +# CONFIG_RTAS_ERROR_LOGGING is not set +CONFIG_RTAS_PROC=y +CONFIG_RTAS_FLASH=y +CONFIG_MMIO_NVRAM=y +CONFIG_CELL_IIC=y +# CONFIG_PPC_MPC106 is not set +# CONFIG_GENERIC_TBSYNC is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_FORCE_MAX_ZONEORDER=13 # CONFIG_IOMMU_VMERGE is not set -CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +# CONFIG_NUMA is not set CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -98,30 +152,21 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_NUMA is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PPC_64K_PAGES is not set CONFIG_SCHED_SMT=y -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_GENERIC_HARDIRQS=y -CONFIG_PPC_RTAS=y -CONFIG_RTAS_PROC=y -CONFIG_RTAS_FLASH=y -CONFIG_SECCOMP=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y # -# Bus Options +# Bus options # +CONFIG_GENERIC_ISA_DMA=y +# CONFIG_PPC_I8259 is not set +# CONFIG_PPC_INDIRECT_PCI is not set CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LEGACY_PROC=y @@ -136,6 +181,7 @@ CONFIG_PCI_LEGACY_PROC=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set +CONFIG_KERNEL_START=0xc000000000000000 # # Networking @@ -183,6 +229,10 @@ CONFIG_INET6_TUNNEL=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# # CONFIG_NETFILTER_NETLINK is not set # @@ -284,6 +334,10 @@ CONFIG_IP_NF_ARP_MANGLE=m # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y @@ -345,14 +399,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set # @@ -442,6 +488,7 @@ CONFIG_IDEDMA_AUTO=y # # Macintosh device drivers # +# CONFIG_WINDFARM is not set # # Network device support @@ -495,7 +542,6 @@ CONFIG_SKGE=m # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set -# CONFIG_SPIDER_NET is not set # CONFIG_MV643XX_ETH is not set # @@ -625,7 +671,7 @@ CONFIG_WATCHDOG=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -CONFIG_WATCHDOG_RTAS=y +# CONFIG_WATCHDOG_RTAS is not set # # PCI-based Watchdog Cards @@ -633,6 +679,8 @@ CONFIG_WATCHDOG_RTAS=y # CONFIG_PCIPCWATCHDOG is not set # CONFIG_WDTPCI is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -649,6 +697,7 @@ CONFIG_WATCHDOG_RTAS=y # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -699,6 +748,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set +# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -757,6 +807,10 @@ CONFIG_USB_ARCH_HAS_OHCI=y # CONFIG_USB is not set # +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# # USB Gadget Support # # CONFIG_USB_GADGET is not set @@ -943,9 +997,24 @@ CONFIG_NLS_ISO8859_15=m # CONFIG_NLS_UTF8 is not set # -# Profiling support +# Library routines +# +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m + +# +# Instrumentation Support # # CONFIG_PROFILING is not set +# CONFIG_KPROBES is not set # # Kernel hacking @@ -962,13 +1031,14 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set # CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_KPROBES is not set # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_DEBUGGER=y # CONFIG_XMON is not set -# CONFIG_PPCDBG is not set CONFIG_IRQSTACKS=y +# CONFIG_BOOTX_TEXT is not set # # Security options @@ -1008,17 +1078,3 @@ CONFIG_CRYPTO_DEFLATE=m # # Hardware crypto devices # - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -CONFIG_CRC32=y -# CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=m -CONFIG_ZLIB_DEFLATE=m -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index e76854f8c121..3c22ccb18519 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14 -# Mon Nov 7 13:37:59 2005 +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:30 2005 # CONFIG_PPC64=y CONFIG_64BIT=y @@ -53,6 +53,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -83,6 +84,23 @@ CONFIG_KMOD=y CONFIG_STOP_MACHINE=y # +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# # Platform support # CONFIG_PPC_MULTIPLATFORM=y @@ -137,6 +155,7 @@ CONFIG_IRQ_ALL_CPUS=y # CONFIG_NUMA is not set CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -215,6 +234,10 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# # CONFIG_NETFILTER_NETLINK is not set # @@ -382,19 +405,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -CONFIG_DEFAULT_AS=y -# CONFIG_DEFAULT_DEADLINE is not set -# CONFIG_DEFAULT_CFQ is not set -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" # CONFIG_ATA_OVER_ETH is not set # @@ -656,7 +666,6 @@ CONFIG_SUNGEM=y # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set # CONFIG_NET_PCI is not set -# CONFIG_FEC_8XX is not set # # Ethernet (1000 Mbit) @@ -710,6 +719,7 @@ CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set CONFIG_PPPOE=m # CONFIG_SLIP is not set # CONFIG_NET_FC is not set @@ -804,6 +814,8 @@ CONFIG_LEGACY_PTY_COUNT=256 # # CONFIG_WATCHDOG is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -917,7 +929,6 @@ CONFIG_FB=y CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y @@ -932,6 +943,7 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set +# CONFIG_FB_S1D13XXX is not set CONFIG_FB_NVIDIA=y CONFIG_FB_NVIDIA_I2C=y # CONFIG_FB_RIVA is not set @@ -950,7 +962,6 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set -# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -959,6 +970,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_VGA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set # CONFIG_FONTS is not set CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y @@ -1192,6 +1204,7 @@ CONFIG_USB_MON=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y # CONFIG_USB_SERIAL_AIRPRIME is not set +# CONFIG_USB_SERIAL_ANYDATA is not set CONFIG_USB_SERIAL_BELKIN=m CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m # CONFIG_USB_SERIAL_CP2101 is not set @@ -1222,7 +1235,6 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_KLSI=m CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m -# CONFIG_USB_SERIAL_NOKIA_DKU2 is not set CONFIG_USB_SERIAL_PL2303=m # CONFIG_USB_SERIAL_HP4X is not set CONFIG_USB_SERIAL_SAFE=m @@ -1474,10 +1486,11 @@ CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m # -# Profiling support +# Instrumentation Support # CONFIG_PROFILING=y CONFIG_OPROFILE=y +# CONFIG_KPROBES is not set # # Kernel hacking @@ -1497,7 +1510,6 @@ CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_KPROBES is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUGGER is not set CONFIG_IRQSTACKS=y diff --git a/arch/powerpc/configs/iseries_defconfig b/arch/powerpc/configs/iseries_defconfig index 62e92c7e9e27..751a622fb7a7 100644 --- a/arch/powerpc/configs/iseries_defconfig +++ b/arch/powerpc/configs/iseries_defconfig @@ -1,18 +1,33 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14-rc4 -# Thu Oct 20 08:30:56 2005 +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:32 2005 # +CONFIG_PPC64=y CONFIG_64BIT=y +CONFIG_PPC_MERGE=y CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Processor support +# +# CONFIG_POWER4_ONLY is not set +CONFIG_POWER3=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +# CONFIG_ALTIVEC is not set +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=32 # # Code maturity level options @@ -40,6 +55,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set @@ -68,22 +84,60 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y CONFIG_STOP_MACHINE=y -CONFIG_SYSVIPC_COMPAT=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -CONFIG_PPC_ISERIES=y # CONFIG_PPC_MULTIPLATFORM is not set -CONFIG_PPC=y -CONFIG_PPC64=y +CONFIG_PPC_ISERIES=y +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set +# CONFIG_PPC_RTAS is not set +# CONFIG_MMIO_NVRAM is not set CONFIG_IBMVIO=y -# CONFIG_POWER4_ONLY is not set +# CONFIG_PPC_MPC106 is not set +# CONFIG_GENERIC_TBSYNC is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_PREEMPT_BKL is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_IOMMU_VMERGE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=32 +CONFIG_IRQ_ALL_CPUS=y +CONFIG_LPARCFG=y +# CONFIG_NUMA is not set CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -91,26 +145,20 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_NUMA is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PPC_64K_PAGES is not set # CONFIG_SCHED_SMT is not set -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -# CONFIG_PREEMPT_BKL is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_GENERIC_HARDIRQS=y -CONFIG_LPARCFG=y +CONFIG_PROC_DEVICETREE=y +# CONFIG_PM is not set CONFIG_SECCOMP=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_ISA_DMA_API=y # -# Bus Options +# Bus options # +CONFIG_GENERIC_ISA_DMA=y +# CONFIG_PPC_I8259 is not set +# CONFIG_PPC_INDIRECT_PCI is not set CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LEGACY_PROC=y @@ -125,6 +173,7 @@ CONFIG_PCI_LEGACY_PROC=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set +CONFIG_KERNEL_START=0xc000000000000000 # # Networking @@ -166,6 +215,10 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# # CONFIG_NETFILTER_NETLINK is not set # @@ -265,6 +318,10 @@ CONFIG_LLC=y # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y @@ -326,14 +383,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set # @@ -377,6 +426,7 @@ CONFIG_SCSI_FC_ATTRS=y # # SCSI low-level drivers # +# CONFIG_ISCSI_TCP is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -454,6 +504,7 @@ CONFIG_DM_ZERO=m # # Macintosh device drivers # +# CONFIG_WINDFARM is not set # # Network device support @@ -561,6 +612,7 @@ CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set CONFIG_PPPOE=m # CONFIG_SLIP is not set # CONFIG_NET_FC is not set @@ -643,6 +695,8 @@ CONFIG_LEGACY_PTY_COUNT=256 # # CONFIG_WATCHDOG is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -660,6 +714,7 @@ CONFIG_MAX_RAW_DEVS=256 # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -713,6 +768,10 @@ CONFIG_USB_ARCH_HAS_OHCI=y # CONFIG_USB is not set # +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# # USB Gadget Support # # CONFIG_USB_GADGET is not set @@ -917,10 +976,25 @@ CONFIG_VIOTAPE=m CONFIG_VIOPATH=y # -# Profiling support +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m + +# +# Instrumentation Support # CONFIG_PROFILING=y CONFIG_OPROFILE=y +# CONFIG_KPROBES is not set # # Kernel hacking @@ -937,11 +1011,11 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_KPROBES is not set CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUGGER is not set -# CONFIG_PPCDBG is not set CONFIG_IRQSTACKS=y # @@ -982,17 +1056,3 @@ CONFIG_CRYPTO_TEST=m # # Hardware crypto devices # - -# -# Library routines -# -CONFIG_CRC_CCITT=m -# CONFIG_CRC16 is not set -CONFIG_CRC32=y -CONFIG_LIBCRC32C=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 7b480f3d1406..07b6d3d23360 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -1,18 +1,32 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14-rc4 -# Thu Oct 20 08:31:24 2005 +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:36 2005 # +CONFIG_PPC64=y CONFIG_64BIT=y +CONFIG_PPC_MERGE=y CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Processor support +# +CONFIG_POWER4_ONLY=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +# CONFIG_ALTIVEC is not set +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 # # Code maturity level options @@ -39,6 +53,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -67,32 +82,67 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y CONFIG_STOP_MACHINE=y -CONFIG_SYSVIPC_COMPAT=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -# CONFIG_PPC_ISERIES is not set CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set # CONFIG_PPC_PSERIES is not set -# CONFIG_PPC_BPA is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y -CONFIG_PPC=y -CONFIG_PPC64=y +# CONFIG_PPC_CELL is not set CONFIG_PPC_OF=y -CONFIG_MPIC=y -# CONFIG_ALTIVEC is not set -CONFIG_KEXEC=y CONFIG_U3_DART=y +CONFIG_MPIC=y +# CONFIG_PPC_RTAS is not set +# CONFIG_MMIO_NVRAM is not set CONFIG_MPIC_BROKEN_U3=y -CONFIG_BOOTX_TEXT=y -CONFIG_POWER4_ONLY=y +# CONFIG_PPC_MPC106 is not set +CONFIG_GENERIC_TBSYNC=y +# CONFIG_CPU_FREQ is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_PREEMPT_BKL is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_IOMMU_VMERGE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=2 +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +# CONFIG_NUMA is not set CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -100,27 +150,21 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_NUMA is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PPC_64K_PAGES is not set # CONFIG_SCHED_SMT is not set -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -# CONFIG_PREEMPT_BKL is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_GENERIC_HARDIRQS=y -CONFIG_SECCOMP=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y # -# Bus Options +# Bus options # +CONFIG_GENERIC_ISA_DMA=y +# CONFIG_PPC_I8259 is not set +# CONFIG_PPC_INDIRECT_PCI is not set CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LEGACY_PROC=y @@ -135,6 +179,7 @@ CONFIG_PCI_LEGACY_PROC=y # PCI Hotplug Support # # CONFIG_HOTPLUG_PCI is not set +CONFIG_KERNEL_START=0xc000000000000000 # # Networking @@ -193,8 +238,11 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set # # Network testing @@ -254,14 +302,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_BLK_DEV_INITRD is not set # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set # @@ -351,6 +391,7 @@ CONFIG_IDEDMA_AUTO=y # # Macintosh device drivers # +# CONFIG_WINDFARM is not set # # Network device support @@ -533,6 +574,8 @@ CONFIG_LEGACY_PTY_COUNT=256 # # CONFIG_WATCHDOG is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -549,6 +592,7 @@ CONFIG_LEGACY_PTY_COUNT=256 # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -599,6 +643,7 @@ CONFIG_I2C_AMD8111=y # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set +# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -681,12 +726,15 @@ CONFIG_USB_UHCI_HCD=y # # USB Device Class drivers # -# CONFIG_USB_BLUETOOTH_TTY is not set # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information # # CONFIG_USB_STORAGE is not set @@ -746,6 +794,7 @@ CONFIG_USB_SERIAL=y # CONFIG_USB_SERIAL_CONSOLE is not set CONFIG_USB_SERIAL_GENERIC=y # CONFIG_USB_SERIAL_AIRPRIME is not set +# CONFIG_USB_SERIAL_ANYDATA is not set # CONFIG_USB_SERIAL_BELKIN is not set # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set # CONFIG_USB_SERIAL_CP2101 is not set @@ -985,9 +1034,19 @@ CONFIG_NLS_DEFAULT="utf-8" CONFIG_NLS_UTF8=y # -# Profiling support +# Library routines +# +CONFIG_CRC_CCITT=y +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y + +# +# Instrumentation Support # # CONFIG_PROFILING is not set +# CONFIG_KPROBES is not set # # Kernel hacking @@ -1004,14 +1063,15 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_KPROBES is not set CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUGGER=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -# CONFIG_PPCDBG is not set # CONFIG_IRQSTACKS is not set +CONFIG_BOOTX_TEXT=y # # Security options @@ -1051,12 +1111,3 @@ CONFIG_CRYPTO_DES=y # # Hardware crypto devices # - -# -# Library routines -# -CONFIG_CRC_CCITT=y -# CONFIG_CRC16 is not set -CONFIG_CRC32=y -# CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig new file mode 100644 index 000000000000..509399eab6f5 --- /dev/null +++ b/arch/powerpc/configs/ppc64_defconfig @@ -0,0 +1,1580 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:38 2005 +# +CONFIG_PPC64=y +CONFIG_64BIT=y +CONFIG_PPC_MERGE=y +CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_PPC=y +CONFIG_EARLY_PRINTK=y +CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y + +# +# Processor support +# +# CONFIG_POWER4_ONLY is not set +CONFIG_POWER3=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +CONFIG_ALTIVEC=y +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=32 + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CPUSETS=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Platform support +# +CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set +CONFIG_PPC_PSERIES=y +CONFIG_PPC_PMAC=y +CONFIG_PPC_PMAC64=y +CONFIG_PPC_MAPLE=y +# CONFIG_PPC_CELL is not set +CONFIG_PPC_OF=y +CONFIG_XICS=y +CONFIG_U3_DART=y +CONFIG_MPIC=y +CONFIG_PPC_RTAS=y +CONFIG_RTAS_ERROR_LOGGING=y +CONFIG_RTAS_PROC=y +CONFIG_RTAS_FLASH=m +# CONFIG_MMIO_NVRAM is not set +CONFIG_MPIC_BROKEN_U3=y +CONFIG_IBMVIO=y +# CONFIG_PPC_MPC106 is not set +CONFIG_GENERIC_TBSYNC=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +# CONFIG_CPU_FREQ_DEBUG is not set +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_PMAC64=y +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_PREEMPT_BKL is not set +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_FORCE_MAX_ZONEORDER=13 +CONFIG_IOMMU_VMERGE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_PPC_SPLPAR=y +CONFIG_EEH=y +CONFIG_SCANLOG=m +CONFIG_LPARCFG=y +# CONFIG_NUMA is not set +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +# CONFIG_MEMORY_HOTPLUG is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PPC_64K_PAGES is not set +# CONFIG_SCHED_SMT is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y + +# +# Bus options +# +CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC_I8259=y +# CONFIG_PPC_INDIRECT_PCI is not set +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set +CONFIG_HOTPLUG_PCI_RPA=m +CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_KERNEL_START=0xc000000000000000 + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=y +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_TUNNEL=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_CT_ACCT=y +CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m +CONFIG_IP_NF_CT_PROTO_SCTP=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_MATCH_REALM=m +CONFIG_IP_NF_MATCH_SCTP=m +CONFIG_IP_NF_MATCH_DCCP=m +CONFIG_IP_NF_MATCH_COMMENT=m +CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNBYTES=m +CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_MATCH_STRING=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_TARGET_NFQUEUE=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CONNMARK=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_NOTRACK=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +CONFIG_LLC=y +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +CONFIG_NET_CLS_ROUTE=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_SL82C105 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +CONFIG_BLK_DEV_IDE_PMAC=y +CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y +CONFIG_BLK_DEV_IDEDMA_PMAC=y +# CONFIG_BLK_DEV_IDE_PMAC_BLINK is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=m +# CONFIG_SCSI_SAS_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +CONFIG_SCSI_SATA=y +# CONFIG_SCSI_SATA_AHCI is not set +CONFIG_SCSI_SATA_SVW=y +# CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set +# CONFIG_SCSI_SATA_NV is not set +# CONFIG_SCSI_PDC_ADMA is not set +# CONFIG_SCSI_SATA_QSTOR is not set +# CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_SX4 is not set +# CONFIG_SCSI_SATA_SIL is not set +# CONFIG_SCSI_SATA_SIL24 is not set +# CONFIG_SCSI_SATA_SIS is not set +# CONFIG_SCSI_SATA_ULI is not set +# CONFIG_SCSI_SATA_VIA is not set +# CONFIG_SCSI_SATA_VITESSE is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +CONFIG_SCSI_IBMVSCSI=y +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_IPR=y +CONFIG_SCSI_IPR_TRACE=y +CONFIG_SCSI_IPR_DUMP=y +# CONFIG_SCSI_QLOGIC_FC is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +CONFIG_SCSI_QLA2XXX=y +CONFIG_SCSI_QLA21XX=m +CONFIG_SCSI_QLA22XX=m +CONFIG_SCSI_QLA2300=m +CONFIG_SCSI_QLA2322=m +CONFIG_SCSI_QLA6312=m +CONFIG_SCSI_QLA24XX=m +CONFIG_SCSI_LPFC=m +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +CONFIG_SCSI_DEBUG=m + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +CONFIG_MD_RAID10=y +CONFIG_MD_RAID5=y +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_EMC=m + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set + +# +# IEEE 1394 (FireWire) support +# +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set +CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y +CONFIG_IEEE1394_CONFIG_ROM_IP1394=y +# CONFIG_IEEE1394_EXPORT_FULL_API is not set + +# +# Device Drivers +# +# CONFIG_IEEE1394_PCILYNX is not set +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=y +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Macintosh device drivers +# +CONFIG_ADB_PMU=y +CONFIG_PMAC_SMU=y +CONFIG_THERM_PM72=y +CONFIG_WINDFARM=y +CONFIG_WINDFARM_PM81=y +CONFIG_WINDFARM_PM91=y + +# +# Network device support +# +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_BONDING=m +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# PHY device support +# +# CONFIG_PHYLIB is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +CONFIG_SUNGEM=y +# CONFIG_CASSINI is not set +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_IBMVETH=m +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=y +CONFIG_ACENIC_OMIT_TIGON_I=y +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y +# CONFIG_BNX2 is not set +# CONFIG_MV643XX_ETH is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +CONFIG_IXGB=m +# CONFIG_IXGB_NAPI is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMOL=y +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set +CONFIG_PPPOE=m +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +CONFIG_NETPOLL_RX=y +CONFIG_NETPOLL_TRAP=y +CONFIG_NET_POLL_CONTROLLER=y + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PCSPKR=m +# CONFIG_INPUT_UINPUT is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_PMACZILOG is not set +CONFIG_SERIAL_ICOM=m +CONFIG_SERIAL_JSM=m +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_HVC_CONSOLE=y +CONFIG_HVCS=m + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +CONFIG_I2C_AMD8111=y +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +CONFIG_I2C_KEYWEST=y +CONFIG_I2C_PMAC_SMU=y +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_RTC_X1205_I2C is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Hardware Monitoring support +# +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set + +# +# Misc devices +# + +# +# Multimedia Capabilities Port drivers +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +CONFIG_FB=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_MACMODES=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_OF=y +# CONFIG_FB_CONTROL is not set +# CONFIG_FB_PLATINUM is not set +# CONFIG_FB_VALKYRIE is not set +# CONFIG_FB_CT65550 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +CONFIG_FB_MATROX_I2C=m +CONFIG_FB_MATROX_MAVEN=m +CONFIG_FB_MATROX_MULTIHEAD=y +# CONFIG_FB_RADEON_OLD is not set +CONFIG_FB_RADEON=y +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_CYBLA is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_DEVICE=y +CONFIG_LCD_CLASS_DEVICE=y +CONFIG_LCD_DEVICE=y + +# +# Sound +# +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_GENERIC_DRIVER=y + +# +# Generic devices +# +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_YMFPCI is not set +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_FM801 is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_HDA_INTEL is not set + +# +# ALSA PowerMac devices +# +CONFIG_SND_POWERMAC=m +CONFIG_SND_POWERMAC_AUTO_DRC=y + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set + +# +# Open Sound System +# +# CONFIG_SOUND_PRIME is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +CONFIG_USB_HIDDEV=y +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_ACECAD is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_ITMTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set +# CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +# CONFIG_USB_MON is not set + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=m +# CONFIG_INFINIBAND_USER_MAD is not set +# CONFIG_INFINIBAND_USER_ACCESS is not set +CONFIG_INFINIBAND_MTHCA=m +# CONFIG_INFINIBAND_MTHCA_DEBUG is not set +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_IPOIB_DEBUG is not set +# CONFIG_INFINIBAND_SRP is not set + +# +# SN Devices +# + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=y +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=m +CONFIG_XFS_EXPORT=y +# CONFIG_XFS_QUOTA is not set +CONFIG_XFS_SECURITY=y +CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=y +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=y +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=m +CONFIG_NFSD_V2_ACL=y +CONFIG_NFSD_V3=y +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_NFS_ACL_SUPPORT=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +CONFIG_RPCSEC_GSS_SPKM3=m +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_EFI_PARTITION is not set + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m + +# +# Instrumentation Support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +# CONFIG_KPROBES is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set +CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUGGER=y +CONFIG_XMON=y +# CONFIG_XMON_DEFAULT is not set +CONFIG_IRQSTACKS=y +CONFIG_BOOTX_TEXT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_CRC32C=m +CONFIG_CRYPTO_TEST=m + +# +# Hardware crypto devices +# diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 9f09dff9e11a..a50ce0fa9243 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1,18 +1,33 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14-rc4 -# Thu Oct 20 08:32:17 2005 +# Linux kernel version: 2.6.15-rc5 +# Tue Dec 20 15:59:40 2005 # +CONFIG_PPC64=y CONFIG_64BIT=y +CONFIG_PPC_MERGE=y CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Processor support +# +# CONFIG_POWER4_ONLY is not set +CONFIG_POWER3=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +CONFIG_ALTIVEC=y +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=128 # # Code maturity level options @@ -40,6 +55,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -68,75 +84,102 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y CONFIG_STOP_MACHINE=y -CONFIG_SYSVIPC_COMPAT=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -# CONFIG_PPC_ISERIES is not set CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set CONFIG_PPC_PSERIES=y -# CONFIG_PPC_BPA is not set # CONFIG_PPC_PMAC is not set # CONFIG_PPC_MAPLE is not set -CONFIG_PPC=y -CONFIG_PPC64=y +# CONFIG_PPC_CELL is not set CONFIG_PPC_OF=y CONFIG_XICS=y +# CONFIG_U3_DART is not set CONFIG_MPIC=y -CONFIG_ALTIVEC=y -CONFIG_PPC_SPLPAR=y -CONFIG_KEXEC=y +CONFIG_PPC_RTAS=y +CONFIG_RTAS_ERROR_LOGGING=y +CONFIG_RTAS_PROC=y +CONFIG_RTAS_FLASH=m +# CONFIG_MMIO_NVRAM is not set CONFIG_IBMVIO=y -# CONFIG_U3_DART is not set -# CONFIG_BOOTX_TEXT is not set -# CONFIG_POWER4_ONLY is not set +# CONFIG_PPC_MPC106 is not set +# CONFIG_GENERIC_TBSYNC is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_PREEMPT_BKL is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_IOMMU_VMERGE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=128 +CONFIG_HOTPLUG_CPU=y +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_PPC_SPLPAR=y +CONFIG_EEH=y +CONFIG_SCANLOG=m +CONFIG_LPARCFG=y +CONFIG_NUMA=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_DISCONTIGMEM_ENABLE=y -CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_FLATMEM_MANUAL is not set -CONFIG_DISCONTIGMEM_MANUAL=y -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_DISCONTIGMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +# CONFIG_MEMORY_HOTPLUG is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y -CONFIG_NODES_SPAN_OTHER_NODES=y -CONFIG_NUMA=y +# CONFIG_PPC_64K_PAGES is not set CONFIG_SCHED_SMT=y -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -# CONFIG_PREEMPT_BKL is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_EEH=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_PPC_RTAS=y -CONFIG_RTAS_PROC=y -CONFIG_RTAS_FLASH=m -CONFIG_SCANLOG=m -CONFIG_LPARCFG=y -CONFIG_SECCOMP=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -CONFIG_HOTPLUG_CPU=y CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y # -# Bus Options +# Bus options # +CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC_I8259=y +# CONFIG_PPC_INDIRECT_PCI is not set CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LEGACY_PROC=y @@ -156,6 +199,7 @@ CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_KERNEL_START=0xc000000000000000 # # Networking @@ -197,6 +241,10 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# CONFIG_NETFILTER_NETLINK=y CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m @@ -299,6 +347,10 @@ CONFIG_LLC=y # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y @@ -368,14 +420,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set # @@ -473,6 +517,7 @@ CONFIG_SCSI_ISCSI_ATTRS=m # # SCSI low-level drivers # +# CONFIG_ISCSI_TCP is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -559,6 +604,7 @@ CONFIG_DM_MULTIPATH_EMC=m # # Macintosh device drivers # +# CONFIG_WINDFARM is not set # # Network device support @@ -645,7 +691,6 @@ CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set -# CONFIG_2BUFF_MODE is not set # # Token Ring devices @@ -674,6 +719,7 @@ CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set CONFIG_PPPOE=m # CONFIG_SLIP is not set # CONFIG_NET_FC is not set @@ -784,6 +830,8 @@ CONFIG_HVCS=m # # CONFIG_WATCHDOG is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -801,6 +849,7 @@ CONFIG_MAX_RAW_DEVS=1024 # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -852,6 +901,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set +# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -893,7 +943,6 @@ CONFIG_FB=y CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y @@ -905,6 +954,7 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_NVIDIA is not set # CONFIG_FB_RIVA is not set CONFIG_FB_MATROX=y @@ -927,7 +977,6 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set -# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -936,6 +985,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_VGA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set # CONFIG_FONTS is not set CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y @@ -990,12 +1040,15 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # # USB Device Class drivers # -# CONFIG_USB_BLUETOOTH_TTY is not set # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set @@ -1106,6 +1159,7 @@ CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m # CONFIG_INFINIBAND_IPOIB_DEBUG is not set +# CONFIG_INFINIBAND_SRP is not set # # SN Devices @@ -1288,10 +1342,25 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_UTF8 is not set # -# Profiling support +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m + +# +# Instrumentation Support # CONFIG_PROFILING=y CONFIG_OPROFILE=y +# CONFIG_KPROBES is not set # # Kernel hacking @@ -1308,14 +1377,15 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_KPROBES is not set CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUGGER=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -# CONFIG_PPCDBG is not set CONFIG_IRQSTACKS=y +# CONFIG_BOOTX_TEXT is not set # # Security options @@ -1355,17 +1425,3 @@ CONFIG_CRYPTO_TEST=m # # Hardware crypto devices # - -# -# Library routines -# -CONFIG_CRC_CCITT=m -# CONFIG_CRC16 is not set -CONFIG_CRC32=y -CONFIG_LIBCRC32C=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b3ae2993efb8..9ed551b6c172 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -4,6 +4,7 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc +CFLAGS_ioctl32.o += -Ifs/ endif ifeq ($(CONFIG_PPC32),y) CFLAGS_prom_init.o += -fPIC @@ -11,17 +12,30 @@ CFLAGS_btext.o += -fPIC endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ - signal_32.o pmc.o + irq.o align.o signal_32.o pmc.o vdso.o +obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ - signal_64.o ptrace32.o systbl.o + signal_64.o ptrace32.o systbl.o \ + paca.o ioctl32.o cpu_setup_power4.o \ + firmware.o sysfs.o udbg.o idle_64.o +obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o -obj-$(CONFIG_PPC_RTAS) += rtas.o +procfs-$(CONFIG_PPC64) := proc_ppc64.o +obj-$(CONFIG_PROC_FS) += $(procfs-y) +rtaspci-$(CONFIG_PPC64) := rtas_pci.o +obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y) obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o +obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o +obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o +obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o +udbgscc-$(CONFIG_PPC64) := udbg_scc.o +obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y) +obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o ifeq ($(CONFIG_PPC_MERGE),y) @@ -36,12 +50,23 @@ extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ prom.o traps.o setup-common.o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o -obj-$(CONFIG_PPC64) += misc_64.o +obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_KPROBES) += kprobes.o + +module-$(CONFIG_PPC64) += module_64.o +obj-$(CONFIG_MODULES) += $(module-y) + +pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \ + pci_direct_iommu.o iomap.o +obj-$(CONFIG_PCI) += $(pci64-y) + +kexec64-$(CONFIG_PPC64) += machine_kexec_64.o +obj-$(CONFIG_KEXEC) += $(kexec64-y) ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s @@ -49,13 +74,12 @@ AFLAGS_head_64.o += -I$(obj) endif else -# stuff used from here for ARCH=ppc or ARCH=ppc64 +# stuff used from here for ARCH=ppc smpobj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \ - setup-common.o $(smpobj-y) - endif +obj-$(CONFIG_PPC64) += $(obj64-y) + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c new file mode 100644 index 000000000000..faaec9c6f78f --- /dev/null +++ b/arch/powerpc/kernel/align.c @@ -0,0 +1,530 @@ +/* align.c - handle alignment exceptions for the Power PC. + * + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright (c) 2001-2002 PPC64 team, IBM Corp + * 64-bit and Power4 support + * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * Merge ppc32 and ppc64 implementations + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/cache.h> +#include <asm/cputable.h> + +struct aligninfo { + unsigned char len; + unsigned char flags; +}; + +#define IS_XFORM(inst) (((inst) >> 26) == 31) +#define IS_DSFORM(inst) (((inst) >> 26) >= 56) + +#define INVALID { 0, 0 } + +#define LD 1 /* load */ +#define ST 2 /* store */ +#define SE 4 /* sign-extend value */ +#define F 8 /* to/from fp regs */ +#define U 0x10 /* update index register */ +#define M 0x20 /* multiple load/store */ +#define SW 0x40 /* byte swap int or ... */ +#define S 0x40 /* ... single-precision fp */ +#define SX 0x40 /* byte count in XER */ +#define HARD 0x80 /* string, stwcx. */ + +#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ + +#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) + +/* + * The PowerPC stores certain bits of the instruction that caused the + * alignment exception in the DSISR register. This array maps those + * bits to information about the operand length and what the + * instruction would do. + */ +static struct aligninfo aligninfo[128] = { + { 4, LD }, /* 00 0 0000: lwz / lwarx */ + INVALID, /* 00 0 0001 */ + { 4, ST }, /* 00 0 0010: stw */ + INVALID, /* 00 0 0011 */ + { 2, LD }, /* 00 0 0100: lhz */ + { 2, LD+SE }, /* 00 0 0101: lha */ + { 2, ST }, /* 00 0 0110: sth */ + { 4, LD+M }, /* 00 0 0111: lmw */ + { 4, LD+F+S }, /* 00 0 1000: lfs */ + { 8, LD+F }, /* 00 0 1001: lfd */ + { 4, ST+F+S }, /* 00 0 1010: stfs */ + { 8, ST+F }, /* 00 0 1011: stfd */ + INVALID, /* 00 0 1100 */ + { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ + INVALID, /* 00 0 1110 */ + { 8, ST }, /* 00 0 1111: std/stdu */ + { 4, LD+U }, /* 00 1 0000: lwzu */ + INVALID, /* 00 1 0001 */ + { 4, ST+U }, /* 00 1 0010: stwu */ + INVALID, /* 00 1 0011 */ + { 2, LD+U }, /* 00 1 0100: lhzu */ + { 2, LD+SE+U }, /* 00 1 0101: lhau */ + { 2, ST+U }, /* 00 1 0110: sthu */ + { 4, ST+M }, /* 00 1 0111: stmw */ + { 4, LD+F+S+U }, /* 00 1 1000: lfsu */ + { 8, LD+F+U }, /* 00 1 1001: lfdu */ + { 4, ST+F+S+U }, /* 00 1 1010: stfsu */ + { 8, ST+F+U }, /* 00 1 1011: stfdu */ + INVALID, /* 00 1 1100 */ + INVALID, /* 00 1 1101 */ + INVALID, /* 00 1 1110 */ + INVALID, /* 00 1 1111 */ + { 8, LD }, /* 01 0 0000: ldx */ + INVALID, /* 01 0 0001 */ + { 8, ST }, /* 01 0 0010: stdx */ + INVALID, /* 01 0 0011 */ + INVALID, /* 01 0 0100 */ + { 4, LD+SE }, /* 01 0 0101: lwax */ + INVALID, /* 01 0 0110 */ + INVALID, /* 01 0 0111 */ + { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */ + { 4, LD+M+HARD }, /* 01 0 1001: lswi */ + { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */ + { 4, ST+M+HARD }, /* 01 0 1011: stswi */ + INVALID, /* 01 0 1100 */ + { 8, LD+U }, /* 01 0 1101: ldu */ + INVALID, /* 01 0 1110 */ + { 8, ST+U }, /* 01 0 1111: stdu */ + { 8, LD+U }, /* 01 1 0000: ldux */ + INVALID, /* 01 1 0001 */ + { 8, ST+U }, /* 01 1 0010: stdux */ + INVALID, /* 01 1 0011 */ + INVALID, /* 01 1 0100 */ + { 4, LD+SE+U }, /* 01 1 0101: lwaux */ + INVALID, /* 01 1 0110 */ + INVALID, /* 01 1 0111 */ + INVALID, /* 01 1 1000 */ + INVALID, /* 01 1 1001 */ + INVALID, /* 01 1 1010 */ + INVALID, /* 01 1 1011 */ + INVALID, /* 01 1 1100 */ + INVALID, /* 01 1 1101 */ + INVALID, /* 01 1 1110 */ + INVALID, /* 01 1 1111 */ + INVALID, /* 10 0 0000 */ + INVALID, /* 10 0 0001 */ + INVALID, /* 10 0 0010: stwcx. */ + INVALID, /* 10 0 0011 */ + INVALID, /* 10 0 0100 */ + INVALID, /* 10 0 0101 */ + INVALID, /* 10 0 0110 */ + INVALID, /* 10 0 0111 */ + { 4, LD+SW }, /* 10 0 1000: lwbrx */ + INVALID, /* 10 0 1001 */ + { 4, ST+SW }, /* 10 0 1010: stwbrx */ + INVALID, /* 10 0 1011 */ + { 2, LD+SW }, /* 10 0 1100: lhbrx */ + { 4, LD+SE }, /* 10 0 1101 lwa */ + { 2, ST+SW }, /* 10 0 1110: sthbrx */ + INVALID, /* 10 0 1111 */ + INVALID, /* 10 1 0000 */ + INVALID, /* 10 1 0001 */ + INVALID, /* 10 1 0010 */ + INVALID, /* 10 1 0011 */ + INVALID, /* 10 1 0100 */ + INVALID, /* 10 1 0101 */ + INVALID, /* 10 1 0110 */ + INVALID, /* 10 1 0111 */ + INVALID, /* 10 1 1000 */ + INVALID, /* 10 1 1001 */ + INVALID, /* 10 1 1010 */ + INVALID, /* 10 1 1011 */ + INVALID, /* 10 1 1100 */ + INVALID, /* 10 1 1101 */ + INVALID, /* 10 1 1110 */ + { 0, ST+HARD }, /* 10 1 1111: dcbz */ + { 4, LD }, /* 11 0 0000: lwzx */ + INVALID, /* 11 0 0001 */ + { 4, ST }, /* 11 0 0010: stwx */ + INVALID, /* 11 0 0011 */ + { 2, LD }, /* 11 0 0100: lhzx */ + { 2, LD+SE }, /* 11 0 0101: lhax */ + { 2, ST }, /* 11 0 0110: sthx */ + INVALID, /* 11 0 0111 */ + { 4, LD+F+S }, /* 11 0 1000: lfsx */ + { 8, LD+F }, /* 11 0 1001: lfdx */ + { 4, ST+F+S }, /* 11 0 1010: stfsx */ + { 8, ST+F }, /* 11 0 1011: stfdx */ + INVALID, /* 11 0 1100 */ + { 8, LD+M }, /* 11 0 1101: lmd */ + INVALID, /* 11 0 1110 */ + { 8, ST+M }, /* 11 0 1111: stmd */ + { 4, LD+U }, /* 11 1 0000: lwzux */ + INVALID, /* 11 1 0001 */ + { 4, ST+U }, /* 11 1 0010: stwux */ + INVALID, /* 11 1 0011 */ + { 2, LD+U }, /* 11 1 0100: lhzux */ + { 2, LD+SE+U }, /* 11 1 0101: lhaux */ + { 2, ST+U }, /* 11 1 0110: sthux */ + INVALID, /* 11 1 0111 */ + { 4, LD+F+S+U }, /* 11 1 1000: lfsux */ + { 8, LD+F+U }, /* 11 1 1001: lfdux */ + { 4, ST+F+S+U }, /* 11 1 1010: stfsux */ + { 8, ST+F+U }, /* 11 1 1011: stfdux */ + INVALID, /* 11 1 1100 */ + INVALID, /* 11 1 1101 */ + INVALID, /* 11 1 1110 */ + INVALID, /* 11 1 1111 */ +}; + +/* + * Create a DSISR value from the instruction + */ +static inline unsigned make_dsisr(unsigned instr) +{ + unsigned dsisr; + + + /* bits 6:15 --> 22:31 */ + dsisr = (instr & 0x03ff0000) >> 16; + + if (IS_XFORM(instr)) { + /* bits 29:30 --> 15:16 */ + dsisr |= (instr & 0x00000006) << 14; + /* bit 25 --> 17 */ + dsisr |= (instr & 0x00000040) << 8; + /* bits 21:24 --> 18:21 */ + dsisr |= (instr & 0x00000780) << 3; + } else { + /* bit 5 --> 17 */ + dsisr |= (instr & 0x04000000) >> 12; + /* bits 1: 4 --> 18:21 */ + dsisr |= (instr & 0x78000000) >> 17; + /* bits 30:31 --> 12:13 */ + if (IS_DSFORM(instr)) + dsisr |= (instr & 0x00000003) << 18; + } + + return dsisr; +} + +/* + * The dcbz (data cache block zero) instruction + * gives an alignment fault if used on non-cacheable + * memory. We handle the fault mainly for the + * case when we are running with the cache disabled + * for debugging. + */ +static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) +{ + long __user *p; + int i, size; + +#ifdef __powerpc64__ + size = ppc64_caches.dline_size; +#else + size = L1_CACHE_BYTES; +#endif + p = (long __user *) (regs->dar & -size); + if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size)) + return -EFAULT; + for (i = 0; i < size / sizeof(long); ++i) + if (__put_user(0, p+i)) + return -EFAULT; + return 1; +} + +/* + * Emulate load & store multiple instructions + * On 64-bit machines, these instructions only affect/use the + * bottom 4 bytes of each register, and the loads clear the + * top 4 bytes of the affected register. + */ +#ifdef CONFIG_PPC64 +#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4) +#else +#define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) +#endif + +static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int nb, + unsigned int flags, unsigned int instr) +{ + unsigned long *rptr; + unsigned int nb0, i; + + /* + * We do not try to emulate 8 bytes multiple as they aren't really + * available in our operating environments and we don't try to + * emulate multiples operations in kernel land as they should never + * be used/generated there at least not on unaligned boundaries + */ + if (unlikely((nb > 4) || !user_mode(regs))) + return 0; + + /* lmw, stmw, lswi/x, stswi/x */ + nb0 = 0; + if (flags & HARD) { + if (flags & SX) { + nb = regs->xer & 127; + if (nb == 0) + return 1; + } else { + if (__get_user(instr, + (unsigned int __user *)regs->nip)) + return -EFAULT; + nb = (instr >> 11) & 0x1f; + if (nb == 0) + nb = 32; + } + if (nb + reg * 4 > 128) { + nb0 = nb + reg * 4 - 128; + nb = 128 - reg * 4; + } + } else { + /* lwm, stmw */ + nb = (32 - reg) * 4; + } + + if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0)) + return -EFAULT; /* bad address */ + + rptr = ®s->gpr[reg]; + if (flags & LD) { + /* + * This zeroes the top 4 bytes of the affected registers + * in 64-bit mode, and also zeroes out any remaining + * bytes of the last register for lsw*. + */ + memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long)); + if (nb0 > 0) + memset(®s->gpr[0], 0, + ((nb0 + 3) / 4) * sizeof(unsigned long)); + + for (i = 0; i < nb; ++i) + if (__get_user(REG_BYTE(rptr, i), addr + i)) + return -EFAULT; + if (nb0 > 0) { + rptr = ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i) + if (__get_user(REG_BYTE(rptr, i), addr + i)) + return -EFAULT; + } + + } else { + for (i = 0; i < nb; ++i) + if (__put_user(REG_BYTE(rptr, i), addr + i)) + return -EFAULT; + if (nb0 > 0) { + rptr = ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i) + if (__put_user(REG_BYTE(rptr, i), addr + i)) + return -EFAULT; + } + } + return 1; +} + + +/* + * Called on alignment exception. Attempts to fixup + * + * Return 1 on success + * Return 0 if unable to handle the interrupt + * Return -EFAULT if data address is bad + */ + +int fix_alignment(struct pt_regs *regs) +{ + unsigned int instr, nb, flags; + unsigned int reg, areg; + unsigned int dsisr; + unsigned char __user *addr; + unsigned char __user *p; + int ret, t; + union { + u64 ll; + double dd; + unsigned char v[8]; + struct { + unsigned hi32; + int low32; + } x32; + struct { + unsigned char hi48[6]; + short low16; + } x16; + } data; + + /* + * We require a complete register set, if not, then our assembly + * is broken + */ + CHECK_FULL_REGS(regs); + + dsisr = regs->dsisr; + + /* Some processors don't provide us with a DSISR we can use here, + * let's make one up from the instruction + */ + if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { + unsigned int real_instr; + if (unlikely(__get_user(real_instr, + (unsigned int __user *)regs->nip))) + return -EFAULT; + dsisr = make_dsisr(real_instr); + } + + /* extract the operation and registers from the dsisr */ + reg = (dsisr >> 5) & 0x1f; /* source/dest register */ + areg = dsisr & 0x1f; /* register to update */ + instr = (dsisr >> 10) & 0x7f; + instr |= (dsisr >> 13) & 0x60; + + /* Lookup the operation in our table */ + nb = aligninfo[instr].len; + flags = aligninfo[instr].flags; + + /* DAR has the operand effective address */ + addr = (unsigned char __user *)regs->dar; + + /* A size of 0 indicates an instruction we don't support, with + * the exception of DCBZ which is handled as a special case here + */ + if (instr == DCBZ) + return emulate_dcbz(regs, addr); + if (unlikely(nb == 0)) + return 0; + + /* Load/Store Multiple instructions are handled in their own + * function + */ + if (flags & M) + return emulate_multiple(regs, addr, reg, nb, flags, instr); + + /* Verify the address of the operand */ + if (unlikely(user_mode(regs) && + !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ), + addr, nb))) + return -EFAULT; + + /* Force the fprs into the save area so we can reference them */ + if (flags & F) { + /* userland only */ + if (unlikely(!user_mode(regs))) + return 0; + flush_fp_to_thread(current); + } + + /* If we are loading, get the data from user space, else + * get it from register values + */ + if (flags & LD) { + data.ll = 0; + ret = 0; + p = addr; + switch (nb) { + case 8: + ret |= __get_user(data.v[0], p++); + ret |= __get_user(data.v[1], p++); + ret |= __get_user(data.v[2], p++); + ret |= __get_user(data.v[3], p++); + case 4: + ret |= __get_user(data.v[4], p++); + ret |= __get_user(data.v[5], p++); + case 2: + ret |= __get_user(data.v[6], p++); + ret |= __get_user(data.v[7], p++); + if (unlikely(ret)) + return -EFAULT; + } + } else if (flags & F) + data.dd = current->thread.fpr[reg]; + else + data.ll = regs->gpr[reg]; + + /* Perform other misc operations like sign extension, byteswap, + * or floating point single precision conversion + */ + switch (flags & ~U) { + case LD+SE: /* sign extend */ + if ( nb == 2 ) + data.ll = data.x16.low16; + else /* nb must be 4 */ + data.ll = data.x32.low32; + break; + case LD+S: /* byte-swap */ + case ST+S: + if (nb == 2) { + SWAP(data.v[6], data.v[7]); + } else { + SWAP(data.v[4], data.v[7]); + SWAP(data.v[5], data.v[6]); + } + break; + + /* Single-precision FP load and store require conversions... */ + case LD+F+S: +#ifdef CONFIG_PPC_FPU + preempt_disable(); + enable_kernel_fp(); + cvt_fd((float *)&data.v[4], &data.dd, ¤t->thread); + preempt_enable(); +#else + return 0; +#endif + break; + case ST+F+S: +#ifdef CONFIG_PPC_FPU + preempt_disable(); + enable_kernel_fp(); + cvt_df(&data.dd, (float *)&data.v[4], ¤t->thread); + preempt_enable(); +#else + return 0; +#endif + break; + } + + /* Store result to memory or update registers */ + if (flags & ST) { + ret = 0; + p = addr; + switch (nb) { + case 8: + ret |= __put_user(data.v[0], p++); + ret |= __put_user(data.v[1], p++); + ret |= __put_user(data.v[2], p++); + ret |= __put_user(data.v[3], p++); + case 4: + ret |= __put_user(data.v[4], p++); + ret |= __put_user(data.v[5], p++); + case 2: + ret |= __put_user(data.v[6], p++); + ret |= __put_user(data.v[7], p++); + } + if (unlikely(ret)) + return -EFAULT; + } else if (flags & F) + current->thread.fpr[reg] = data.dd; + else + regs->gpr[reg] = data.ll; + + /* Update RA as needed */ + if (flags & U) + regs->gpr[areg] = regs->dar; + + return 1; +} diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b75757251994..91538d2445bf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -37,12 +37,12 @@ #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/rtas.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> #include <asm/lppaca.h> #include <asm/iseries/hv_lp_event.h> #include <asm/cache.h> -#include <asm/systemcfg.h> #include <asm/compat.h> #endif @@ -106,7 +106,6 @@ int main(void) DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); - DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); /* paca */ @@ -252,25 +251,44 @@ int main(void) DEFINE(TASK_SIZE, TASK_SIZE); DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); -#else /* CONFIG_PPC64 */ - /* systemcfg offsets for use by vdso */ - DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); - DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); - DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); - DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); - DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); - DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); - DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); - DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); +#endif /* ! CONFIG_PPC64 */ - /* timeval/timezone offsets for use by vdso */ + /* datapage offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32)); + DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); +#ifdef CONFIG_PPC64 + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64)); DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); +#else + DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); +#endif + /* timeval/timezone offsets for use by vdso */ DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); -#endif /* CONFIG_PPC64 */ + + /* Other bits used by the vdso */ + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(CLOCK_REALTIME_RES, TICK_NSEC); + return 0; } diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S new file mode 100644 index 000000000000..cca942fe6115 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power4.S @@ -0,0 +1,233 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +_GLOBAL(__970_cpu_preinit) + /* + * Do nothing if not running in HV mode + */ + mfmsr r0 + rldicl. r0,r0,4,63 + beqlr + + /* + * Deal only with PPC970 and PPC970FX. + */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 + bnelr +1: + + /* Make sure HID4:rm_ci is off before MMU is turned off, that large + * pages are enabled with HID4:61 and clear HID5:DCBZ_size and + * HID5:DCBZ32_ill + */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + mfspr r3,SPRN_HID5 + rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */ + sync + mtspr SPRN_HID5,r3 + isync + sync + + /* Setup some basic HID1 features */ + mfspr r0,SPRN_HID1 + li r3,0x1200 /* enable i-fetch cacheability */ + sldi r3,r3,44 /* and prefetch */ + or r0,r0,r3 + mtspr SPRN_HID1,r0 + mtspr SPRN_HID1,r0 + isync + + /* Clear HIOR */ + li r0,0 + sync + mtspr SPRN_HIOR,0 /* Clear interrupt prefix */ + isync + blr + +_GLOBAL(__setup_cpu_power4) + blr + +_GLOBAL(__setup_cpu_be) + /* Set large page sizes LP=0: 16MB, LP=1: 64KB */ + addi r3, 0, 0 + ori r3, r3, HID6_LB + sldi r3, r3, 32 + nor r3, r3, r3 + mfspr r4, SPRN_HID6 + and r4, r4, r3 + addi r3, 0, 0x02000 + sldi r3, r3, 32 + or r4, r4, r3 + mtspr SPRN_HID6, r4 + blr + +_GLOBAL(__setup_cpu_ppc970) + mfspr r0,SPRN_HID0 + li r11,5 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,8 /* set NAP and DPM */ + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + sync + isync + blr + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 8 +#define CS_HID4 16 +#define CS_HID5 24 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES,0 +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, HID4, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 + bne 2f + +1: /* Save HID0,1,4 and 5 */ + mfspr r3,SPRN_HID0 + std r3,CS_HID0(r5) + mfspr r3,SPRN_HID1 + std r3,CS_HID1(r5) + mfspr r3,SPRN_HID4 + std r3,CS_HID4(r5) + mfspr r3,SPRN_HID5 + std r3,CS_HID5(r5) + +2: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Get storage ptr (FIXME when using anton reloc as we + * are running with translation disabled here + */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + beq 1f + cmpwi r0,0x3c + beq 1f + cmpwi r0,0x44 + bnelr + +1: /* Before accessing memory, we make sure rm_ci is clear */ + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + + /* Clear interrupt prefix */ + li r0,0 + sync + mtspr SPRN_HIOR,0 + isync + + /* Restore HID0 */ + ld r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + sync + isync + + /* Restore HID1 */ + ld r3,CS_HID1(r5) + sync + isync + mtspr SPRN_HID1,r3 + mtspr SPRN_HID1,r3 + sync + isync + + /* Restore HID4 */ + ld r3,CS_HID4(r5) + sync + isync + mtspr SPRN_HID4,r3 + sync + isync + + /* Restore HID5 */ + ld r3,CS_HID5(r5) + sync + isync + mtspr SPRN_HID5,r3 + sync + isync + blr + diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index cc4e9eb1c13f..1d85cedbbb7b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -52,6 +52,9 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); #define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ PPC_FEATURE_HAS_MMU) #define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) +#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS) /* We only set the spe features if the kernel was compiled with @@ -160,7 +163,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x00350000, .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, - .cpu_user_features = COMMON_USER_PPC64, + .cpu_user_features = COMMON_USER_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -175,7 +178,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x00380000, .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, - .cpu_user_features = COMMON_USER_PPC64, + .cpu_user_features = COMMON_USER_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -190,7 +193,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x00390000, .cpu_name = "PPC970", .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_PPC64 | + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, .icache_bsize = 128, .dcache_bsize = 128, @@ -212,7 +215,7 @@ struct cpu_spec cpu_specs[] = { #else .cpu_features = CPU_FTRS_PPC970, #endif - .cpu_user_features = COMMON_USER_PPC64 | + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, .icache_bsize = 128, .dcache_bsize = 128, @@ -230,7 +233,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x00440000, .cpu_name = "PPC970MP", .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_PPC64 | + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, .icache_bsize = 128, .dcache_bsize = 128, @@ -245,7 +248,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x003a0000, .cpu_name = "POWER5 (gr)", .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_PPC64, + .cpu_user_features = COMMON_USER_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -260,7 +263,7 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x003b0000, .cpu_name = "POWER5 (gs)", .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_PPC64, + .cpu_user_features = COMMON_USER_POWER5_PLUS, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -276,7 +279,7 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "Cell Broadband Engine", .cpu_features = CPU_FTRS_CELL, .cpu_user_features = COMMON_USER_PPC64 | - PPC_FEATURE_HAS_ALTIVEC_COMP, + PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP, .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c new file mode 100644 index 000000000000..7c3419656ccc --- /dev/null +++ b/arch/powerpc/kernel/dma_64.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Implements the generic device dma API for ppc64. Handles + * the pci and vio busses + */ + +#include <linux/device.h> +#include <linux/dma-mapping.h> +/* Include the busses we support */ +#include <linux/pci.h> +#include <asm/vio.h> +#include <asm/scatterlist.h> +#include <asm/bug.h> + +static struct dma_mapping_ops *get_dma_ops(struct device *dev) +{ +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return &pci_dma_ops; +#endif +#ifdef CONFIG_IBMVIO + if (dev->bus == &vio_bus_type) + return &vio_dma_ops; +#endif + return NULL; +} + +int dma_supported(struct device *dev, u64 mask) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->dma_supported(dev, mask); + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_supported); + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return pci_set_dma_mask(to_pci_dev(dev), dma_mask); +#endif +#ifdef CONFIG_IBMVIO + if (dev->bus == &vio_bus_type) + return -EIO; +#endif /* CONFIG_IBMVIO */ + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->alloc_coherent(dev, size, dma_handle, flag); + BUG(); + return NULL; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); + else + BUG(); +} +EXPORT_SYMBOL(dma_free_coherent); + +dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_single(dev, cpu_addr, size, direction); + BUG(); + return (dma_addr_t)0; +} +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_single(dev, dma_addr, size, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_single); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_single(dev, + (page_address(page) + offset), size, direction); + BUG(); + return (dma_addr_t)0; +} +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_single(dev, dma_address, size, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_page); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + return dma_ops->map_sg(dev, sg, nents, direction); + BUG(); + return 0; +} +EXPORT_SYMBOL(dma_map_sg); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + struct dma_mapping_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops) + dma_ops->unmap_sg(dev, sg, nhwentries, direction); + else + BUG(); +} +EXPORT_SYMBOL(dma_unmap_sg); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2d22bf03484e..bce33a38399f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -183,8 +183,8 @@ syscall_exit_trace_cont: ld r13,GPR13(r1) /* returning to usermode */ 1: ld r2,GPR2(r1) li r12,MSR_RI - andc r10,r10,r12 - mtmsrd r10,1 /* clear MSR.RI */ + andc r11,r10,r12 + mtmsrd r11,1 /* clear MSR.RI */ ld r1,GPR1(r1) mtlr r4 mtcr r5 diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c new file mode 100644 index 000000000000..65eae752a527 --- /dev/null +++ b/arch/powerpc/kernel/firmware.c @@ -0,0 +1,45 @@ +/* + * Extracted from cputable.c + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + * Copyright (C) 2005 Stephen Rothwell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> + +#include <asm/firmware.h> + +unsigned long ppc64_firmware_features; + +#ifdef CONFIG_PPC_PSERIES +firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, +}; +#endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 4d6001fa1cf2..b780b42c95fc 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -41,20 +41,20 @@ _GLOBAL(load_up_fpu) #ifndef CONFIG_SMP LOADBASE(r3, last_task_used_math) toreal(r3) - LDL r4,OFF(last_task_used_math)(r3) - CMPI 0,r4,0 + PPC_LL r4,OFF(last_task_used_math)(r3) + PPC_LCMPI 0,r4,0 beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ SAVE_32FPRS(0, r4) mffs fr0 stfd fr0,THREAD_FPSCR(r4) - LDL r5,PT_REGS(r4) + PPC_LL r5,PT_REGS(r4) toreal(r5) - LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) li r10,MSR_FP|MSR_FE0|MSR_FE1 andc r4,r4,r10 /* disable FP for previous task */ - STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: #endif /* CONFIG_SMP */ /* enable use of FP after return */ @@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) - STL r4,OFF(last_task_used_math)(r3) + PPC_STL r4,OFF(last_task_used_math)(r3) #endif /* CONFIG_SMP */ /* restore registers and return */ /* we haven't used ctr or xer or lr */ @@ -97,24 +97,24 @@ _GLOBAL(giveup_fpu) MTMSRD(r5) /* enable use of fpu now */ SYNC_601 isync - CMPI 0,r3,0 + PPC_LCMPI 0,r3,0 beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ - LDL r5,PT_REGS(r3) - CMPI 0,r5,0 + PPC_LL r5,PT_REGS(r3) + PPC_LCMPI 0,r5,0 SAVE_32FPRS(0, r3) mffs fr0 stfd fr0,THREAD_FPSCR(r3) beq 1f - LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) li r3,MSR_FP|MSR_FE0|MSR_FE1 andc r4,r4,r3 /* disable FP for previous task */ - STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) + PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: #ifndef CONFIG_SMP li r5,0 LOADBASE(r4,last_task_used_math) - STL r5,OFF(last_task_used_math)(r4) + PPC_STL r5,OFF(last_task_used_math)(r4) #endif /* CONFIG_SMP */ blr diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index b102e3a2415e..ccdf94731e30 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -1100,6 +1100,7 @@ start_here: mr r3,r31 mr r4,r30 bl machine_init + bl __save_cpu_setup bl MMU_init #ifdef CONFIG_APUS diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 16ab40daa738..8a8bf79ef044 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -28,7 +28,6 @@ #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/systemcfg.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/bug.h> @@ -1697,25 +1696,14 @@ _GLOBAL(pmac_secondary_start) * SPRG3 = paca virtual address */ _GLOBAL(__secondary_start) + /* Set thread priority to MEDIUM */ + HMT_MEDIUM - HMT_MEDIUM /* Set thread priority to MEDIUM */ - + /* Load TOC */ ld r2,PACATOC(r13) - li r6,0 - stb r6,PACAPROCENABLED(r13) - -#ifndef CONFIG_PPC_ISERIES - /* Initialize the page table pointer register. */ - LOADADDR(r6,_SDR1) - ld r6,0(r6) /* get the value of _SDR1 */ - mtspr SPRN_SDR1,r6 /* set the htab location */ -#endif - /* Initialize the first segment table (or SLB) entry */ - ld r3,PACASTABVIRT(r13) /* get addr of segment table */ -BEGIN_FTR_SECTION - bl .stab_initialize -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) - bl .slb_initialize + + /* Do early setup for that CPU (stab, slb, hash table pointer) */ + bl .early_setup_secondary /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) @@ -1724,37 +1712,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD std r1,PACAKSAVE(r13) - ld r3,PACASTABREAL(r13) /* get raddr of segment table */ - ori r4,r3,1 /* turn on valid bit */ - -#ifdef CONFIG_PPC_ISERIES - li r0,-1 /* hypervisor call */ - li r3,1 - sldi r3,r3,63 /* 0x8000000000000000 */ - ori r3,r3,4 /* 0x8000000000000004 */ - sc /* HvCall_setASR */ -#else - /* set the ASR */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - beq 98f /* branch if result is 0 */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmpwi r3,0x37 /* SStar */ - beq 97f - cmpwi r3,0x36 /* IStar */ - beq 97f - cmpwi r3,0x34 /* Pulsar */ - bne 98f -97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ - HVSC /* Invoking hcall */ - b 99f -98: /* !(rpa hypervisor) || !(star) */ - mtasr r4 /* set the stab location */ -99: -#endif + /* Clear backchain so we get nice backtraces */ li r7,0 mtlr r7 @@ -1777,6 +1735,7 @@ _GLOBAL(start_secondary_prolog) li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary + b . #endif /* @@ -1896,40 +1855,6 @@ _STATIC(start_here_multiplatform) mr r3,r31 bl .early_setup - /* set the ASR */ - ld r3,PACASTABREAL(r13) - ori r4,r3,1 /* turn on valid bit */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - beq 98f /* branch if result is 0 */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmpwi r3,0x37 /* SStar */ - beq 97f - cmpwi r3,0x36 /* IStar */ - beq 97f - cmpwi r3,0x34 /* Pulsar */ - bne 98f -97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ - HVSC /* Invoking hcall */ - b 99f -98: /* !(rpa hypervisor) || !(star) */ - mtasr r4 /* set the stab location */ -99: - /* Set SDR1 (hash table pointer) */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - /* Test if bit 0 is set (LPAR bit) */ - andi. r3,r3,PLATFORM_LPAR - bne 98f /* branch if result is !0 */ - LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ - add r6,r6,r26 - ld r6,0(r6) /* get the value of _SDR1 */ - mtspr SPRN_SDR1,r6 /* set the htab location */ -98: LOADADDR(r3,.start_here_common) SET_REG_TO_CONST(r4, MSR_KERNEL) mtspr SPRN_SRR0,r3 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 5063c603fad4..8d60fa99fc4b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -24,7 +24,7 @@ * Copyright 2002-2004 MontaVista Software, Inc. * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> * Copyright 2004 Freescale Semiconductor, Inc - * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com> + * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle_64.c new file mode 100644 index 000000000000..b879d3057ef8 --- /dev/null +++ b/arch/powerpc/kernel/idle_64.c @@ -0,0 +1,121 @@ +/* + * Idle daemon for PowerPC. Idle daemon will handle any action + * that needs to be taken when the system becomes idle. + * + * Originally Written by Cort Dougan (cort@cs.nmt.edu) + * + * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> + * + * Additional shared processor, SMT, and firmware support + * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/cpu.h> +#include <linux/sysctl.h> + +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/smp.h> + +extern void power4_idle(void); + +void default_idle(void) +{ + unsigned int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); + + while (1) { + if (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + } + + ppc64_runlatch_on(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +void native_idle(void) +{ + while (1) { + ppc64_runlatch_off(); + + if (!need_resched()) + power4_idle(); + + if (need_resched()) { + ppc64_runlatch_on(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } + + if (cpu_is_offline(smp_processor_id()) && + system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +void cpu_idle(void) +{ + BUG_ON(NULL == ppc_md.idle_loop); + ppc_md.idle_loop(); +} + +int powersave_nap; + +#ifdef CONFIG_SYSCTL +/* + * Register the sysctl to set/clear powersave_nap. + */ +static ctl_table powersave_nap_ctl_table[]={ + { + .ctl_name = KERN_PPC_POWERSAVE_NAP, + .procname = "powersave-nap", + .data = &powersave_nap, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { 0, }, +}; +static ctl_table powersave_nap_sysctl_root[] = { + { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, }, + { 0,}, +}; + +static int __init +register_powersave_nap_sysctl(void) +{ + register_sysctl_table(powersave_nap_sysctl_root, 0); + + return 0; +} +__initcall(register_powersave_nap_sysctl); +#endif diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c new file mode 100644 index 000000000000..0fa3d27fef01 --- /dev/null +++ b/arch/powerpc/kernel/ioctl32.c @@ -0,0 +1,45 @@ +/* + * ioctl32.c: Conversion between 32bit and 64bit native ioctls. + * + * Based on sparc64 ioctl32.c by: + * + * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * ppc64 changes: + * + * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) + * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) + * + * These routines maintain argument size conversion between 32bit and 64bit + * ioctls. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define INCLUDES +#include "compat_ioctl.c" +#include <linux/syscalls.h> + +#define CODE +#include "compat_ioctl.c" + +#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, +#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) + +#define IOCTL_TABLE_START \ + struct ioctl_trans ioctl_start[] = { +#define IOCTL_TABLE_END \ + }; + +IOCTL_TABLE_START +#include <linux/compat_ioctl.h> +#define DECLARES +#include "compat_ioctl.c" + +IOCTL_TABLE_END + +int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c new file mode 100644 index 000000000000..6160c8dbb7c5 --- /dev/null +++ b/arch/powerpc/kernel/iomap.c @@ -0,0 +1,146 @@ +/* + * arch/ppc64/kernel/iomap.c + * + * ppc64 "iomap" interface implementation. + * + * (C) Copyright 2004 Linus Torvalds + */ +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include <asm/io.h> + +/* + * Here comes the ppc64 implementation of the IOMAP + * interfaces. + */ +unsigned int fastcall ioread8(void __iomem *addr) +{ + return readb(addr); +} +unsigned int fastcall ioread16(void __iomem *addr) +{ + return readw(addr); +} +unsigned int fastcall ioread16be(void __iomem *addr) +{ + return in_be16(addr); +} +unsigned int fastcall ioread32(void __iomem *addr) +{ + return readl(addr); +} +unsigned int fastcall ioread32be(void __iomem *addr) +{ + return in_be32(addr); +} +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread16be); +EXPORT_SYMBOL(ioread32); +EXPORT_SYMBOL(ioread32be); + +void fastcall iowrite8(u8 val, void __iomem *addr) +{ + writeb(val, addr); +} +void fastcall iowrite16(u16 val, void __iomem *addr) +{ + writew(val, addr); +} +void fastcall iowrite16be(u16 val, void __iomem *addr) +{ + out_be16(addr, val); +} +void fastcall iowrite32(u32 val, void __iomem *addr) +{ + writel(val, addr); +} +void fastcall iowrite32be(u32 val, void __iomem *addr) +{ + out_be32(addr, val); +} +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite16be); +EXPORT_SYMBOL(iowrite32); +EXPORT_SYMBOL(iowrite32be); + +/* + * These are the "repeat read/write" functions. Note the + * non-CPU byte order. We do things in "IO byteorder" + * here. + * + * FIXME! We could make these do EEH handling if we really + * wanted. Not clear if we do. + */ +void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insb((u8 __iomem *) addr, dst, count); +} +void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insw_ns((u16 __iomem *) addr, dst, count); +} +void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +{ + _insl_ns((u32 __iomem *) addr, dst, count); +} +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(ioread32_rep); + +void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsb((u8 __iomem *) addr, src, count); +} +void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsw_ns((u16 __iomem *) addr, src, count); +} +void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) +{ + _outsl_ns((u32 __iomem *) addr, src, count); +} +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(iowrite32_rep); + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + if (!_IO_IS_VALID(port)) + return NULL; + return (void __iomem *) (port+pci_io_base); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + if (flags & IORESOURCE_IO) + return ioport_map(start, len); + if (flags & IORESOURCE_MEM) + return ioremap(start, len); + /* What? */ + return NULL; +} + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(pci_iomap); +EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c new file mode 100644 index 000000000000..4d9b4388918b --- /dev/null +++ b/arch/powerpc/kernel/iommu.c @@ -0,0 +1,572 @@ +/* + * arch/ppc64/kernel/iommu.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup, new allocation schemes, virtual merging: + * Copyright (C) 2004 Olof Johansson, IBM Corporation + * and Ben. Herrenschmidt, IBM Corporation + * + * Dynamic DMA mapping support, bus-independent parts. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/iommu.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> + +#define DBG(...) + +#ifdef CONFIG_IOMMU_VMERGE +static int novmerge = 0; +#else +static int novmerge = 1; +#endif + +static int __init setup_iommu(char *str) +{ + if (!strcmp(str, "novmerge")) + novmerge = 1; + else if (!strcmp(str, "vmerge")) + novmerge = 0; + return 1; +} + +__setup("iommu=", setup_iommu); + +static unsigned long iommu_range_alloc(struct iommu_table *tbl, + unsigned long npages, + unsigned long *handle, + unsigned int align_order) +{ + unsigned long n, end, i, start; + unsigned long limit; + int largealloc = npages > 15; + int pass = 0; + unsigned long align_mask; + + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* This allocator was derived from x86_64's bit string search */ + + /* Sanity check */ + if (unlikely(npages) == 0) { + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + } + + if (handle && *handle) + start = *handle; + else + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + /* Use only half of the table for small allocs (15 pages or less) */ + limit = largealloc ? tbl->it_size : tbl->it_halfpoint; + + if (largealloc && start < tbl->it_halfpoint) + start = tbl->it_halfpoint; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the initial start. + */ + if (start >= limit) + start = largealloc ? tbl->it_largehint : tbl->it_hint; + + again: + + n = find_next_zero_bit(tbl->it_map, limit, start); + + /* Align allocation */ + n = (n + align_mask) & ~align_mask; + + end = n + npages; + + if (unlikely(end >= limit)) { + if (likely(pass < 2)) { + /* First failure, just rescan the half of the table. + * Second failure, rescan the other half of the table. + */ + start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; + limit = pass ? tbl->it_size : limit; + pass++; + goto again; + } else { + /* Third failure, give up */ + return DMA_ERROR_CODE; + } + } + + for (i = n; i < end; i++) + if (test_bit(i, tbl->it_map)) { + start = i+1; + goto again; + } + + for (i = n; i < end; i++) + __set_bit(i, tbl->it_map); + + /* Bump the hint to a new block for small allocs. */ + if (largealloc) { + /* Don't bump to new block to avoid fragmentation */ + tbl->it_largehint = end; + } else { + /* Overflow will be taken care of at the next allocation */ + tbl->it_hint = (end + tbl->it_blocksize - 1) & + ~(tbl->it_blocksize - 1); + } + + /* Update handle for SG allocations */ + if (handle) + *handle = end; + + return n; +} + +static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page, + unsigned int npages, enum dma_data_direction direction, + unsigned int align_order) +{ + unsigned long entry, flags; + dma_addr_t ret = DMA_ERROR_CODE; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + entry = iommu_range_alloc(tbl, npages, NULL, align_order); + + if (unlikely(entry == DMA_ERROR_CODE)) { + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } + + entry += tbl->it_offset; /* Offset into real TCE table */ + ret = entry << PAGE_SHIFT; /* Set the return dma address */ + + /* Put the TCEs in the HW table */ + ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, + direction); + + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + /* Make sure updates are seen by hardware */ + mb(); + + return ret; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + unsigned long i; + + entry = dma_addr >> PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + if (((free_entry + npages) > tbl->it_size) || + (entry < tbl->it_offset)) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_free: invalid entry\n"); + printk(KERN_INFO "\tentry = 0x%lx\n", entry); + printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); + printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); + printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno); + printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size); + printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset); + printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index); + WARN_ON(1); + } + return; + } + + ppc_md.tce_free(tbl, entry, npages); + + for (i = 0; i < npages; i++) + __clear_bit(free_entry+i, tbl->it_map); +} + +static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long flags; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + __iommu_free(tbl, dma_addr, npages); + + /* Make sure TLB cache is flushed if the HW needs it. We do + * not do an mb() here on purpose, it is not needed on any of + * the current platforms. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +int iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) +{ + dma_addr_t dma_next = 0, dma_addr; + unsigned long flags; + struct scatterlist *s, *outs, *segstart; + int outcount, incount; + unsigned long handle; + + BUG_ON(direction == DMA_NONE); + + if ((nelems == 0) || !tbl) + return 0; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + DBG("mapping %d elements:\n", nelems); + + spin_lock_irqsave(&(tbl->it_lock), flags); + + for (s = outs; nelems; nelems--, s++) { + unsigned long vaddr, npages, entry, slen; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + vaddr = (unsigned long)page_address(s->page) + s->offset; + npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK); + npages >>= PAGE_SHIFT; + entry = iommu_range_alloc(tbl, npages, &handle, 0); + + DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" + " npages %lx\n", tbl, vaddr, npages); + goto failure; + } + + /* Convert entry to a dma_addr_t */ + entry += tbl->it_offset; + dma_addr = entry << PAGE_SHIFT; + dma_addr |= s->offset; + + DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n", + npages, entry, dma_addr); + + /* Insert into HW table */ + ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction); + + /* If we are in an open segment, try merging */ + if (segstart != s) { + DBG(" - trying merge...\n"); + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if (novmerge || (dma_addr != dma_next)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; outs++; + DBG(" can't merge, new segment.\n"); + } else { + outs->dma_length += s->length; + DBG(" merged, new len: %lx\n", outs->dma_length); + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + DBG(" - filling new segment.\n"); + outs->dma_address = dma_addr; + outs->dma_length = slen; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; + + DBG(" - dma next is: %lx\n", dma_next); + } + + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); + + /* Make sure updates are seen by hardware */ + mb(); + + DBG("mapped %d elements:\n", outcount); + + /* For the sake of iommu_unmap_sg, we clear out the length in the + * next entry of the sglist if we didn't fill the list completely + */ + if (outcount < incount) { + outs++; + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + return outcount; + + failure: + for (s = &sglist[0]; s <= outs; s++) { + if (s->dma_length != 0) { + unsigned long vaddr, npages; + + vaddr = s->dma_address & PAGE_MASK; + npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr) + >> PAGE_SHIFT; + __iommu_free(tbl, vaddr, npages); + } + } + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return 0; +} + + +void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + unsigned long flags; + + BUG_ON(direction == DMA_NONE); + + if (!tbl) + return; + + spin_lock_irqsave(&(tbl->it_lock), flags); + + while (nelems--) { + unsigned int npages; + dma_addr_t dma_handle = sglist->dma_address; + + if (sglist->dma_length == 0) + break; + npages = (PAGE_ALIGN(dma_handle + sglist->dma_length) + - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT; + __iommu_free(tbl, dma_handle, npages); + sglist++; + } + + /* Flush/invalidate TLBs if necessary. As for iommu_free(), we + * do not do an mb() here, the affected platforms do not need it + * when freeing. + */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + spin_unlock_irqrestore(&(tbl->it_lock), flags); +} + +/* + * Build a iommu_table structure. This contains a bit map which + * is used to manage allocation of the tce space. + */ +struct iommu_table *iommu_init_table(struct iommu_table *tbl) +{ + unsigned long sz; + static int welcomed = 0; + + /* Set aside 1/4 of the table for large allocations. */ + tbl->it_halfpoint = tbl->it_size * 3 / 4; + + /* number of bytes needed for the bitmap */ + sz = (tbl->it_size + 7) >> 3; + + tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz)); + if (!tbl->it_map) + panic("iommu_init_table: Can't allocate %ld bytes\n", sz); + + memset(tbl->it_map, 0, sz); + + tbl->it_hint = 0; + tbl->it_largehint = tbl->it_halfpoint; + spin_lock_init(&tbl->it_lock); + + /* Clear the hardware table in case firmware left allocations in it */ + ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); + + if (!welcomed) { + printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", + novmerge ? "disabled" : "enabled"); + welcomed = 1; + } + + return tbl; +} + +void iommu_free_table(struct device_node *dn) +{ + struct pci_dn *pdn = dn->data; + struct iommu_table *tbl = pdn->iommu_table; + unsigned long bitmap_sz, i; + unsigned int order; + + if (!tbl || !tbl->it_map) { + printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__, + dn->full_name); + return; + } + + /* verify that table contains no entries */ + /* it_size is in entries, and we're examining 64 at a time */ + for (i = 0; i < (tbl->it_size/64); i++) { + if (tbl->it_map[i] != 0) { + printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", + __FUNCTION__, dn->full_name); + break; + } + } + + /* calculate bitmap size in bytes */ + bitmap_sz = (tbl->it_size + 7) / 8; + + /* free bitmap */ + order = get_order(bitmap_sz); + free_pages((unsigned long) tbl->it_map, order); + + /* free table */ + kfree(tbl); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address of the buffer + * passed here is the kernel (virtual) address of the buffer. The buffer + * need not be page aligned, the dma_addr_t returned will point to the same + * byte within the page as vaddr. + */ +dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + dma_addr_t dma_handle = DMA_ERROR_CODE; + unsigned long uaddr; + unsigned int npages; + + BUG_ON(direction == DMA_NONE); + + uaddr = (unsigned long)vaddr; + npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK); + npages >>= PAGE_SHIFT; + + if (tbl) { + dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0); + if (dma_handle == DMA_ERROR_CODE) { + if (printk_ratelimit()) { + printk(KERN_INFO "iommu_alloc failed, " + "tbl %p vaddr %p npages %d\n", + tbl, vaddr, npages); + } + } else + dma_handle |= (uaddr & ~PAGE_MASK); + } + + return dma_handle; +} + +void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + if (tbl) + iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) - + (dma_handle & PAGE_MASK)) >> PAGE_SHIFT); +} + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + void *ret = NULL; + dma_addr_t mapping; + unsigned int npages, order; + + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + order = get_order(size); + + /* + * Client asked for way too much space. This is checked later + * anyway. It is easier to debug here for the drivers than in + * the tce tables. + */ + if (order >= IOMAP_MAX_ORDER) { + printk("iommu_alloc_consistent size too large: 0x%lx\n", size); + return NULL; + } + + if (!tbl) + return NULL; + + /* Alloc enough pages (and possibly more) */ + ret = (void *)__get_free_pages(flag, order); + if (!ret) + return NULL; + memset(ret, 0, size); + + /* Set up tces to cover the allocated range */ + mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order); + if (mapping == DMA_ERROR_CODE) { + free_pages((unsigned long)ret, order); + ret = NULL; + } else + *dma_handle = mapping; + return ret; +} + +void iommu_free_coherent(struct iommu_table *tbl, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + unsigned int npages; + + if (tbl) { + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + iommu_free(tbl, dma_handle, npages); + free_pages((unsigned long)vaddr, get_order(size)); + } +} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c new file mode 100644 index 000000000000..5a71ed9612fe --- /dev/null +++ b/arch/powerpc/kernel/irq.c @@ -0,0 +1,479 @@ +/* + * arch/ppc/kernel/irq.c + * + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan <cort@fsmlabs.com> + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the + * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit + * mask register (of which only 16 are defined), hence the weird shifting + * and complement of the cached_irq_mask. I want to be able to stuff + * this right into the SIU SMASK register. + * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx + * to reduce code space and undefined function references. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/threads.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/proc_fs.h> +#include <linux/random.h> +#include <linux/seq_file.h> +#include <linux/cpumask.h> +#include <linux/profile.h> +#include <linux/bitops.h> +#ifdef CONFIG_PPC64 +#include <linux/kallsyms.h> +#endif + +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/cache.h> +#include <asm/prom.h> +#include <asm/ptrace.h> +#include <asm/machdep.h> +#ifdef CONFIG_PPC64 +#include <asm/iseries/it_lp_queue.h> +#include <asm/paca.h> +#endif + +int __irq_offset_value; +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(__irq_offset_value); +#endif + +static int ppc_spurious_interrupts; + +#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) +extern void iSeries_smp_message_recv(struct pt_regs *); +#endif + +#ifdef CONFIG_PPC32 +#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) + +unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; +atomic_t ppc_n_lost_interrupts; + +#ifdef CONFIG_TAU_INT +extern int tau_initialized; +extern int tau_interrupts(int); +#endif + +#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) +extern atomic_t ipi_recv; +extern atomic_t ipi_sent; +#endif +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(irq_desc); + +int distribute_irqs = 1; +u64 ppc64_interrupt_controller; +#endif /* CONFIG_PPC64 */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *)v, j; + struct irqaction *action; + irq_desc_t *desc; + unsigned long flags; + + if (i == 0) { + seq_puts(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ", j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + desc = get_irq_desc(i); + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; + if (!action || !action->handler) + goto skip; + seq_printf(p, "%3d: ", i); +#ifdef CONFIG_SMP + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#else + seq_printf(p, "%10u ", kstat_irqs(i)); +#endif /* CONFIG_SMP */ + if (desc->handler) + seq_printf(p, " %s ", desc->handler->typename); + else + seq_puts(p, " None "); + seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); + seq_printf(p, " %s", action->name); + for (action = action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&desc->lock, flags); + } else if (i == NR_IRQS) { +#ifdef CONFIG_PPC32 +#ifdef CONFIG_TAU_INT + if (tau_initialized){ + seq_puts(p, "TAU: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", tau_interrupts(j)); + seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); + } +#endif +#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) + /* should this be per processor send/receive? */ + seq_printf(p, "IPI (recv/sent): %10u/%u\n", + atomic_read(&ipi_recv), atomic_read(&ipi_sent)); +#endif +#endif /* CONFIG_PPC32 */ + seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); + } + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for_each_irq(irq) { + cpumask_t mask; + + if (irq_desc[irq].status & IRQ_PER_CPU) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif + +#ifdef CONFIG_PPC_ISERIES +void do_IRQ(struct pt_regs *regs) +{ + struct paca_struct *lpaca; + + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 2KB free? */ + { + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + lpaca = get_paca(); +#ifdef CONFIG_SMP + if (lpaca->lppaca.int_dword.fields.ipi_cnt) { + lpaca->lppaca.int_dword.fields.ipi_cnt = 0; + iSeries_smp_message_recv(regs); + } +#endif /* CONFIG_SMP */ + if (hvlpevent_is_pending()) + process_hvlpevents(regs); + + irq_exit(); + + if (lpaca->lppaca.int_dword.fields.decr_int) { + lpaca->lppaca.int_dword.fields.decr_int = 0; + /* Signal a fake decrementer interrupt */ + timer_interrupt(regs); + } +} + +#else /* CONFIG_PPC_ISERIES */ + +void do_IRQ(struct pt_regs *regs) +{ + int irq; +#ifdef CONFIG_IRQSTACKS + struct thread_info *curtp, *irqtp; +#endif + + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 2KB free? */ + { + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + /* + * Every platform is required to implement ppc_md.get_irq. + * This function will either return an irq number or -1 to + * indicate there are no more pending. + * The value -2 is for buggy hardware and means that this IRQ + * has already been handled. -- Tom + */ + irq = ppc_md.get_irq(regs); + + if (irq >= 0) { +#ifdef CONFIG_IRQSTACKS + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[smp_processor_id()]; + if (curtp != irqtp) { + irqtp->task = curtp->task; + irqtp->flags = 0; + call___do_IRQ(irq, regs, irqtp); + irqtp->task = NULL; + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + } else +#endif + __do_IRQ(irq, regs); + } else +#ifdef CONFIG_PPC32 + if (irq != -2) +#endif + /* That's not SMP safe ... but who cares ? */ + ppc_spurious_interrupts++; + irq_exit(); +} + +#endif /* CONFIG_PPC_ISERIES */ + +void __init init_IRQ(void) +{ +#ifdef CONFIG_PPC64 + static int once = 0; + + if (once) + return; + + once++; + +#endif + ppc_md.init_IRQ(); +#ifdef CONFIG_PPC64 + irq_ctx_init(); +#endif +} + +#ifdef CONFIG_PPC64 +/* + * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. + */ + +#define UNDEFINED_IRQ 0xffffffff +unsigned int virt_irq_to_real_map[NR_IRQS]; + +/* + * Don't use virtual irqs 0, 1, 2 for devices. + * The pcnet32 driver considers interrupt numbers < 2 to be invalid, + * and 2 is the XICS IPI interrupt. + * We limit virtual irqs to 17 less than NR_IRQS so that when we + * offset them by 16 (to reserve the first 16 for ISA interrupts) + * we don't end up with an interrupt number >= NR_IRQS. + */ +#define MIN_VIRT_IRQ 3 +#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1) +#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1) + +void +virt_irq_init(void) +{ + int i; + for (i = 0; i < NR_IRQS; i++) + virt_irq_to_real_map[i] = UNDEFINED_IRQ; +} + +/* Create a mapping for a real_irq if it doesn't already exist. + * Return the virtual irq as a convenience. + */ +int virt_irq_create_mapping(unsigned int real_irq) +{ + unsigned int virq, first_virq; + static int warned; + + if (ppc64_interrupt_controller == IC_OPEN_PIC) + return real_irq; /* no mapping for openpic (for now) */ + + if (ppc64_interrupt_controller == IC_CELL_PIC) + return real_irq; /* no mapping for iic either */ + + /* don't map interrupts < MIN_VIRT_IRQ */ + if (real_irq < MIN_VIRT_IRQ) { + virt_irq_to_real_map[real_irq] = real_irq; + return real_irq; + } + + /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */ + virq = real_irq; + if (virq > MAX_VIRT_IRQ) + virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; + + /* search for this number or a free slot */ + first_virq = virq; + while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) { + if (virt_irq_to_real_map[virq] == real_irq) + return virq; + if (++virq > MAX_VIRT_IRQ) + virq = MIN_VIRT_IRQ; + if (virq == first_virq) + goto nospace; /* oops, no free slots */ + } + + virt_irq_to_real_map[virq] = real_irq; + return virq; + + nospace: + if (!warned) { + printk(KERN_CRIT "Interrupt table is full\n"); + printk(KERN_CRIT "Increase NR_IRQS (currently %d) " + "in your kernel sources and rebuild.\n", NR_IRQS); + warned = 1; + } + return NO_IRQ; +} + +/* + * In most cases will get a hit on the very first slot checked in the + * virt_irq_to_real_map. Only when there are a large number of + * IRQs will this be expensive. + */ +unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) +{ + unsigned int virq; + unsigned int first_virq; + + virq = real_irq; + + if (virq > MAX_VIRT_IRQ) + virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; + + first_virq = virq; + + do { + if (virt_irq_to_real_map[virq] == real_irq) + return virq; + + virq++; + + if (virq >= MAX_VIRT_IRQ) + virq = 0; + + } while (first_virq != virq); + + return NO_IRQ; + +} + +#ifdef CONFIG_IRQSTACKS +struct thread_info *softirq_ctx[NR_CPUS]; +struct thread_info *hardirq_ctx[NR_CPUS]; + +void irq_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_cpu(i) { + memset((void *)softirq_ctx[i], 0, THREAD_SIZE); + tp = softirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = SOFTIRQ_OFFSET; + + memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); + tp = hardirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; + } +} + +void do_softirq(void) +{ + unsigned long flags; + struct thread_info *curtp, *irqtp; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + call_do_softirq(irqtp); + irqtp->task = NULL; + } + + local_irq_restore(flags); +} +EXPORT_SYMBOL(do_softirq); + +#endif /* CONFIG_IRQSTACKS */ + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c new file mode 100644 index 000000000000..5368f9c2e6bf --- /dev/null +++ b/arch/powerpc/kernel/kprobes.c @@ -0,0 +1,459 @@ +/* + * Kernel Probes (KProbes) + * arch/ppc64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel + * Probes initial implementation ( includes contributions from + * Rusty Russell). + * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes + * interface to access function arguments. + * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port + * for PPC64 + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/preempt.h> +#include <asm/cacheflush.h> +#include <asm/kdebug.h> +#include <asm/sstep.h> + +static DECLARE_MUTEX(kprobe_mutex); +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + int ret = 0; + kprobe_opcode_t insn = *p->addr; + + if ((unsigned long)p->addr & 0x03) { + printk("Attempt to register kprobe at an unaligned address\n"); + ret = -EINVAL; + } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { + printk("Cannot register a kprobe on rfid or mtmsrd\n"); + ret = -EINVAL; + } + + /* insn must be on a special executable page on ppc64 */ + if (!ret) { + down(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + up(&kprobe_mutex); + if (!p->ainsn.insn) + ret = -ENOMEM; + } + return ret; +} + +void __kprobes arch_copy_kprobe(struct kprobe *p) +{ + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + down(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + up(&kprobe_mutex); +} + +static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = *p->ainsn.insn; + + regs->msr |= MSR_SE; + + /* single step inline if it is a trap variant */ + if (is_trap(insn)) + regs->nip = (unsigned long)p->addr; + else + regs->nip = (unsigned long)p->ainsn.insn; +} + +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_msr = regs->msr; +} + +/* Called with kretprobe_lock held */ +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *)regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)kretprobe_trampoline; + add_rp_inst(ri); + } else { + rp->nmissed++; + } +} + +static inline int kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + unsigned int *addr = (unsigned int *)regs->nip; + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + kprobe_opcode_t insn = *p->ainsn.insn; + if (kcb->kprobe_status == KPROBE_HIT_SS && + is_trap(insn)) { + regs->msr &= ~MSR_SE; + regs->msr |= kcb->kprobe_saved_msr; + goto no_kprobe; + } + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_saved_msr = regs->msr; + kprobes_inc_nmissed_count(p); + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*addr != BREAKPOINT_INSTRUCTION) { + /* + * PowerPC has multiple variants of the "trap" + * instruction. If the current instruction is a + * trap variant, it could belong to someone else + */ + kprobe_opcode_t cur_insn = *addr; + if (is_trap(cur_insn)) + goto no_kprobe; + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p, regs, kcb); + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +void kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "nop\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + spin_lock_irqsave(&kretprobe_lock, flags); + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->nip = orig_ret_address; + + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + unsigned int insn = *p->ainsn.insn; + + regs->nip = (unsigned long)p->addr; + ret = emulate_step(regs, insn); + if (ret == 0) + regs->nip = (unsigned long)p->addr + 4; +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + regs->msr |= kcb->kprobe_saved_msr; + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, msr + * will have SE set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->msr & MSR_SE) + return 0; + + return 1; +} + +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); + regs->msr &= ~MSR_SE; + regs->msr |= kcb->kprobe_saved_msr; + + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BPT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && + kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry); + regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); + + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile("trap" ::: "memory"); +} + +void __kprobes jprobe_return_end(void) +{ +}; + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * FIXME - we should ideally be validating that we got here 'cos + * of the "trap" in jprobe_return() above, before restoring the + * saved regs... + */ + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c new file mode 100644 index 000000000000..9dda16ccde78 --- /dev/null +++ b/arch/powerpc/kernel/lparcfg.c @@ -0,0 +1,608 @@ +/* + * PowerPC64 LPAR Configuration Information Driver + * + * Dave Engebretsen engebret@us.ibm.com + * Copyright (c) 2003 Dave Engebretsen + * Will Schmidt willschm@us.ibm.com + * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This driver creates a proc file at /proc/ppc64/lparcfg which contains + * keyword - value pairs that specify the configuration of the partition. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <asm/uaccess.h> +#include <asm/iseries/hv_lp_config.h> +#include <asm/lppaca.h> +#include <asm/hvcall.h> +#include <asm/firmware.h> +#include <asm/rtas.h> +#include <asm/system.h> +#include <asm/time.h> +#include <asm/iseries/it_exp_vpd_panel.h> +#include <asm/prom.h> +#include <asm/vdso_datapage.h> + +#define MODULE_VERS "1.6" +#define MODULE_NAME "lparcfg" + +/* #define LPARCFG_DEBUG */ + +static struct proc_dir_entry *proc_ppc64_lparcfg; +#define LPARCFG_BUFF_SIZE 4096 + +#ifdef CONFIG_PPC_ISERIES + +/* + * For iSeries legacy systems, the PPA purr function is available from the + * emulated_time_base field in the paca. + */ +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + struct paca_struct *lpaca; + + for_each_cpu(cpu) { + lpaca = paca + cpu; + sum_purr += lpaca->lppaca.emulated_time_base; + +#ifdef PURR_DEBUG + printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n", + cpu, lpaca->lppaca.emulated_time_base); +#endif + } + return sum_purr; +} + +#define lparcfg_write NULL + +/* + * Methods used to fetch LPAR data when running on an iSeries platform. + */ +static int lparcfg_data(struct seq_file *m, void *v) +{ + unsigned long pool_id, lp_index; + int shared, entitled_capacity, max_entitled_capacity; + int processors, max_processors; + struct paca_struct *lpaca = get_paca(); + unsigned long purr = get_purr(); + + seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + + shared = (int)(lpaca->lppaca_ptr->shared_proc); + seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n", + e2a(xItExtVpdPanel.mfgID[2]), + e2a(xItExtVpdPanel.mfgID[3]), + e2a(xItExtVpdPanel.systemSerial[1]), + e2a(xItExtVpdPanel.systemSerial[2]), + e2a(xItExtVpdPanel.systemSerial[3]), + e2a(xItExtVpdPanel.systemSerial[4]), + e2a(xItExtVpdPanel.systemSerial[5])); + + seq_printf(m, "system_type=%c%c%c%c\n", + e2a(xItExtVpdPanel.machineType[0]), + e2a(xItExtVpdPanel.machineType[1]), + e2a(xItExtVpdPanel.machineType[2]), + e2a(xItExtVpdPanel.machineType[3])); + + lp_index = HvLpConfig_getLpIndex(); + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + seq_printf(m, "system_active_processors=%d\n", + (int)HvLpConfig_getSystemPhysicalProcessors()); + + seq_printf(m, "system_potential_processors=%d\n", + (int)HvLpConfig_getSystemPhysicalProcessors()); + + processors = (int)HvLpConfig_getPhysicalProcessors(); + seq_printf(m, "partition_active_processors=%d\n", processors); + + max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); + seq_printf(m, "partition_potential_processors=%d\n", max_processors); + + if (shared) { + entitled_capacity = HvLpConfig_getSharedProcUnits(); + max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits(); + } else { + entitled_capacity = processors * 100; + max_entitled_capacity = max_processors * 100; + } + seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + max_entitled_capacity); + + if (shared) { + pool_id = HvLpConfig_getSharedPoolIndex(); + seq_printf(m, "pool=%d\n", (int)pool_id); + seq_printf(m, "pool_capacity=%d\n", + (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) * + 100)); + seq_printf(m, "purr=%ld\n", purr); + } + + seq_printf(m, "shared_processor_mode=%d\n", shared); + + return 0; +} +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_PSERIES +/* + * Methods used to fetch LPAR data when running on a pSeries platform. + */ +/* find a better place for this function... */ +static void log_plpar_hcall_return(unsigned long rc, char *tag) +{ + if (rc == 0) /* success, return */ + return; +/* check for null tag ? */ + if (rc == H_Hardware) + printk(KERN_INFO + "plpar-hcall (%s) failed with hardware fault\n", tag); + else if (rc == H_Function) + printk(KERN_INFO + "plpar-hcall (%s) failed; function not allowed\n", tag); + else if (rc == H_Authority) + printk(KERN_INFO + "plpar-hcall (%s) failed; not authorized to this function\n", + tag); + else if (rc == H_Parameter) + printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n", + tag); + else + printk(KERN_INFO + "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n", + tag, rc); + +} + +/* + * H_GET_PPP hcall returns info in 4 parms. + * entitled_capacity,unallocated_capacity, + * aggregation, resource_capability). + * + * R4 = Entitled Processor Capacity Percentage. + * R5 = Unallocated Processor Capacity Percentage. + * R6 (AABBCCDDEEFFGGHH). + * XXXX - reserved (0) + * XXXX - reserved (0) + * XXXX - Group Number + * XXXX - Pool Number. + * R7 (IIJJKKLLMMNNOOPP). + * XX - reserved. (0) + * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. + * XX - variable processor Capacity Weight + * XX - Unallocated Variable Processor Capacity Weight. + * XXXX - Active processors in Physical Processor Pool. + * XXXX - Processors active on platform. + */ +static unsigned int h_get_ppp(unsigned long *entitled, + unsigned long *unallocated, + unsigned long *aggregation, + unsigned long *resource) +{ + unsigned long rc; + rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated, + aggregation, resource); + + log_plpar_hcall_return(rc, "H_GET_PPP"); + + return rc; +} + +static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) +{ + unsigned long rc; + unsigned long dummy; + rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy); + + if (rc != H_Authority) + log_plpar_hcall_return(rc, "H_PIC"); +} + +/* Track sum of all purrs across all processors. This is used to further */ +/* calculate usage values by different applications */ + +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + struct cpu_usage *cu; + + for_each_cpu(cpu) { + cu = &per_cpu(cpu_usage_array, cpu); + sum_purr += cu->current_tb; + } + return sum_purr; +} + +#define SPLPAR_CHARACTERISTICS_TOKEN 20 +#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) + +/* + * parse_system_parameter_string() + * Retrieve the potential_processors, max_entitled_capacity and friends + * through the get-system-parameter rtas call. Replace keyword strings as + * necessary. + */ +static void parse_system_parameter_string(struct seq_file *m) +{ + int call_status; + + char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!local_buffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + __FILE__, __FUNCTION__, __LINE__); + return; + } + + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + SPLPAR_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf)); + memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); + spin_unlock(&rtas_data_buf_lock); + + if (call_status != 0) { + printk(KERN_INFO + "%s %s Error calling get-system-parameter (0x%x)\n", + __FILE__, __FUNCTION__, call_status); + } else { + int splpar_strlen; + int idx, w_idx; + char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!workbuffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + __FILE__, __FUNCTION__, __LINE__); + kfree(local_buffer); + return; + } +#ifdef LPARCFG_DEBUG + printk(KERN_INFO "success calling get-system-parameter \n"); +#endif + splpar_strlen = local_buffer[0] * 16 + local_buffer[1]; + local_buffer += 2; /* step over strlen value */ + + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + w_idx = 0; + idx = 0; + while ((*local_buffer) && (idx < splpar_strlen)) { + workbuffer[w_idx++] = local_buffer[idx++]; + if ((local_buffer[idx] == ',') + || (local_buffer[idx] == '\0')) { + workbuffer[w_idx] = '\0'; + if (w_idx) { + /* avoid the empty string */ + seq_printf(m, "%s\n", workbuffer); + } + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + idx++; /* skip the comma */ + w_idx = 0; + } else if (local_buffer[idx] == '=') { + /* code here to replace workbuffer contents + with different keyword strings */ + if (0 == strcmp(workbuffer, "MaxEntCap")) { + strcpy(workbuffer, + "partition_max_entitled_capacity"); + w_idx = strlen(workbuffer); + } + if (0 == strcmp(workbuffer, "MaxPlatProcs")) { + strcpy(workbuffer, + "system_potential_processors"); + w_idx = strlen(workbuffer); + } + } + } + kfree(workbuffer); + local_buffer -= 2; /* back up over strlen value */ + } + kfree(local_buffer); +} + +/* Return the number of processors in the system. + * This function reads through the device tree and counts + * the virtual processors, this does not include threads. + */ +static int lparcfg_count_active_processors(void) +{ + struct device_node *cpus_dn = NULL; + int count = 0; + + while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { +#ifdef LPARCFG_DEBUG + printk(KERN_ERR "cpus_dn %p \n", cpus_dn); +#endif + count++; + } + return count; +} + +static int lparcfg_data(struct seq_file *m, void *v) +{ + int partition_potential_processors; + int partition_active_processors; + struct device_node *rootdn; + const char *model = ""; + const char *system_id = ""; + unsigned int *lp_index_ptr, lp_index = 0; + struct device_node *rtas_node; + int *lrdrp; + + rootdn = find_path_device("/"); + if (rootdn) { + model = get_property(rootdn, "model", NULL); + system_id = get_property(rootdn, "system-id", NULL); + lp_index_ptr = (unsigned int *) + get_property(rootdn, "ibm,partition-no", NULL); + if (lp_index_ptr) + lp_index = *lp_index_ptr; + } + + seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + + seq_printf(m, "serial_number=%s\n", system_id); + + seq_printf(m, "system_type=%s\n", model); + + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + rtas_node = find_path_device("/rtas"); + lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL); + + if (lrdrp == NULL) { + partition_potential_processors = vdso_data->processorCount; + } else { + partition_potential_processors = *(lrdrp + 4); + } + + partition_active_processors = lparcfg_count_active_processors(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + unsigned long h_entitled, h_unallocated; + unsigned long h_aggregation, h_resource; + unsigned long pool_idle_time, pool_procs; + unsigned long purr; + + h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, + &h_resource); + + seq_printf(m, "R4=0x%lx\n", h_entitled); + seq_printf(m, "R5=0x%lx\n", h_unallocated); + seq_printf(m, "R6=0x%lx\n", h_aggregation); + seq_printf(m, "R7=0x%lx\n", h_resource); + + purr = get_purr(); + + /* this call handles the ibm,get-system-parameter contents */ + parse_system_parameter_string(m); + + seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); + + seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); + + seq_printf(m, "system_active_processors=%ld\n", + (h_resource >> 0 * 8) & 0xffff); + + /* pool related entries are apropriate for shared configs */ + if (paca[0].lppaca.shared_proc) { + + h_pic(&pool_idle_time, &pool_procs); + + seq_printf(m, "pool=%ld\n", + (h_aggregation >> 0 * 8) & 0xffff); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%ld\n", + ((h_resource >> 2 * 8) & 0xffff) * 100); + + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%ld\n", + (h_resource >> 4 * 8) & 0xFF); + + seq_printf(m, "capacity_weight=%ld\n", + (h_resource >> 5 * 8) & 0xFF); + + seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); + + seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); + + seq_printf(m, "purr=%ld\n", purr); + + } else { /* non SPLPAR case */ + + seq_printf(m, "system_active_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "system_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + partition_potential_processors * 100); + + seq_printf(m, "partition_entitled_capacity=%d\n", + partition_active_processors * 100); + } + + seq_printf(m, "partition_active_processors=%d\n", + partition_active_processors); + + seq_printf(m, "partition_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc); + + return 0; +} + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user * buf, + size_t count, loff_t * off) +{ + char *kbuf; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + + unsigned long current_entitled; /* parameters for h_get_ppp */ + unsigned long dummy; + unsigned long resource; + u8 current_weight; + + ssize_t retval = -ENOMEM; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + goto out; + + retval = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + + retval = -EINVAL; + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + goto out; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_weight_ptr = ¤t_weight; + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_entitled_ptr = ¤t_entitled; + } else + goto out; + + /* Get our current parameters */ + retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); + if (retval) { + retval = -EIO; + goto out; + } + + current_weight = (resource >> 5 * 8) & 0xFF; + + pr_debug("%s: current_entitled = %lu, current_weight = %lu\n", + __FUNCTION__, current_entitled, current_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %lu\n", + __FUNCTION__, *new_entitled_ptr, *new_weight_ptr); + + retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, + *new_weight_ptr); + + if (retval == H_Success || retval == H_Constrained) { + retval = count; + } else if (retval == H_Busy) { + retval = -EBUSY; + } else if (retval == H_Hardware) { + retval = -EIO; + } else if (retval == H_Parameter) { + retval = -EINVAL; + } else { + printk(KERN_WARNING "%s: received unknown hv return code %ld", + __FUNCTION__, retval); + retval = -EIO; + } + +out: + kfree(kbuf); + return retval; +} + +#endif /* CONFIG_PPC_PSERIES */ + +static int lparcfg_open(struct inode *inode, struct file *file) +{ + return single_open(file, lparcfg_data, NULL); +} + +struct file_operations lparcfg_fops = { + .owner = THIS_MODULE, + .read = seq_read, + .open = lparcfg_open, + .release = single_release, +}; + +int __init lparcfg_init(void) +{ + struct proc_dir_entry *ent; + mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + lparcfg_fops.write = lparcfg_write; + mode |= S_IWUSR; + } + + ent = create_proc_entry("ppc64/lparcfg", mode, NULL); + if (ent) { + ent->proc_fops = &lparcfg_fops; + ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL); + if (!ent->data) { + printk(KERN_ERR + "Failed to allocate buffer for lparcfg\n"); + remove_proc_entry("lparcfg", ent->parent); + return -ENOMEM; + } + } else { + printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); + return -EIO; + } + + proc_ppc64_lparcfg = ent; + return 0; +} + +void __exit lparcfg_cleanup(void) +{ + if (proc_ppc64_lparcfg) { + kfree(proc_ppc64_lparcfg->data); + remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); + } +} + +module_init(lparcfg_init); +module_exit(lparcfg_cleanup); +MODULE_DESCRIPTION("Interface for LPAR configuration data"); +MODULE_AUTHOR("Dave Engebretsen"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c new file mode 100644 index 000000000000..97c51e452be7 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -0,0 +1,358 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include <linux/cpumask.h> +#include <linux/kexec.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/errno.h> + +#include <asm/page.h> +#include <asm/current.h> +#include <asm/machdep.h> +#include <asm/cacheflush.h> +#include <asm/paca.h> +#include <asm/mmu.h> +#include <asm/sections.h> /* _end */ +#include <asm/prom.h> +#include <asm/smp.h> + +#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ + +/* Have this around till we move it into crash specific file */ +note_buf_t crash_notes[NR_CPUS]; + +/* Dummy for now. Not sure if we need to have a crash shutdown in here + * and if what it will achieve. Letting it be now to compile the code + * in generic kexec environment + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* do nothing right now */ + /* smp_relase_cpus() if we want smp on panic kernel */ + /* cpu_irq_down to isolate us until we are ready */ +} + +int machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + if (!ppc_md.hpte_clear_all) + return -ENOENT; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* + * For non-LPAR, we absolutely can not overwrite the mmu hash + * table, since we are still using the bolted entries in it to + * do the copy. Check that here. + * + * It is safe if the end is below the start of the blocked + * region (end <= low), or if the beginning is after the + * end of the blocked region (begin >= high). Use the + * boolean identity !(a || b) === (!a && !b). + */ + if (htab_address) { + low = __pa(htab_address); + high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + /* We also should not overwrite the tce tables */ + for (node = of_find_node_by_type(NULL, "pci"); node != NULL; + node = of_find_node_by_type(node, "pci")) { + basep = (unsigned long *)get_property(node, "linux,tce-base", + NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", + NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* we do nothing in prepare that needs to be undone */ +} + +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range(ranges[i].mem + KERNELBASE, + ranges[i].mem + KERNELBASE + + ranges[i].memsz); +} + +#ifdef CONFIG_SMP + +/* FIXME: we should schedule this function to be called on all cpus based + * on calling the interrupts, but we would like to call it off irq level + * so that the interrupt controller is clean. + */ +void kexec_smp_down(void *arg) +{ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 1); + + local_irq_disable(); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus(void) +{ + int my_cpu, i, notified=-1; + + smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); + my_cpu = get_cpu(); + + /* check the others cpus are now down (via paca hw cpu id == -1) */ + for (i=0; i < NR_CPUS; i++) { + if (i == my_cpu) + continue; + + while (paca[i].hw_cpu_id != -1) { + barrier(); + if (!cpu_possible(i)) { + printk("kexec: cpu %d hw_cpu_id %d is not" + " possible, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (!cpu_online(i)) { + /* Fixme: this can be spinning in + * pSeries_secondary_wait with a paca + * waiting for it to go online. + */ + printk("kexec: cpu %d hw_cpu_id %d is not" + " online, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (i != notified) { + printk( "kexec: waiting for cpu %d (physical" + " %d) to go down\n", + i, paca[i].hw_cpu_id); + notified = i; + } + } + } + + /* after we tell the others to go down */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + + put_cpu(); + + local_irq_disable(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + * + * We need to release the cpus if we are ever going from an + * UP to an SMP kernel. + */ + smp_release_cpus(); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + local_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +union thread_union kexec_stack + __attribute__((__section__(".data.init_task"))) = { }; + +/* Our assembly helper, in kexec_stub.S */ +extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) ATTRIB_NORET; + +/* too late to fail here */ +void machine_kexec(struct kimage *image) +{ + + /* prepare control code if any */ + + /* shutdown other cpus into our wait loop and quiesce interrupts */ + kexec_prepare_cpus(); + + /* switch to a staticly allocated stack. Based on irq stack code. + * XXX: the task struct will likely be invalid once we do the copy! + */ + kexec_stack.thread_info.task = current_thread_info()->task; + kexec_stack.thread_info.flags = 0; + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + ppc_md.hpte_clear_all); + /* NOTREACHED */ +} + +/* Values we need to export to the second kernel via the device tree. */ +static unsigned long htab_base, htab_size, kernel_end; + +static struct property htab_base_prop = { + .name = "linux,htab-base", + .length = sizeof(unsigned long), + .value = (unsigned char *)&htab_base, +}; + +static struct property htab_size_prop = { + .name = "linux,htab-size", + .length = sizeof(unsigned long), + .value = (unsigned char *)&htab_size, +}; + +static struct property kernel_end_prop = { + .name = "linux,kernel-end", + .length = sizeof(unsigned long), + .value = (unsigned char *)&kernel_end, +}; + +static void __init export_htab_values(void) +{ + struct device_node *node; + + node = of_find_node_by_path("/chosen"); + if (!node) + return; + + kernel_end = __pa(_end); + prom_add_property(node, &kernel_end_prop); + + /* On machines with no htab htab_address is NULL */ + if (NULL == htab_address) + goto out; + + htab_base = __pa(htab_address); + prom_add_property(node, &htab_base_prop); + + htab_size = 1UL << ppc64_pft_size; + prom_add_property(node, &htab_size_prop); + + out: + of_node_put(node); +} + +void __init kexec_setup(void) +{ + export_htab_values(); +} diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 3bedb532aed9..624a983a9676 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,14 +27,6 @@ .text - .align 5 -_GLOBAL(__delay) - cmpwi 0,r3,0 - mtctr r3 - beqlr -1: bdnz 1b - blr - /* * This returns the high 64 bits of the product of two 64-bit numbers. */ @@ -519,7 +511,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) * * flush_icache_range(unsigned long start, unsigned long stop) */ -_GLOBAL(flush_icache_range) +_GLOBAL(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) @@ -607,27 +599,6 @@ _GLOBAL(invalidate_dcache_range) sync /* wait for dcbi's to get to ram */ blr -#ifdef CONFIG_NOT_COHERENT_CACHE -/* - * 40x cores have 8K or 16K dcache and 32 byte line size. - * 44x has a 32K dcache and 32 byte line size. - * 8xx has 1, 2, 4, 8K variants. - * For now, cover the worst case of the 44x. - * Must be called with external interrupts disabled. - */ -#define CACHE_NWAYS 64 -#define CACHE_NLINES 16 - -_GLOBAL(flush_dcache_all) - li r4, (2 * CACHE_NWAYS * CACHE_NLINES) - mtctr r4 - lis r5, KERNELBASE@h -1: lwz r3, 0(r5) /* Load one word from every line */ - addi r5, r5, L1_CACHE_BYTES - bdnz 1b - blr -#endif /* CONFIG_NOT_COHERENT_CACHE */ - /* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index ae1433da09b2..ae48a002f81a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -89,12 +89,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_IRQ_event) +_GLOBAL(call___do_IRQ) mflr r0 std r0,16(r1) - stdu r1,THREAD_SIZE-112(r6) - mr r1,r6 - bl .handle_IRQ_event + stdu r1,THREAD_SIZE-112(r5) + mr r1,r5 + bl .__do_IRQ ld r1,0(r1) ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c new file mode 100644 index 000000000000..928b8581fcb0 --- /dev/null +++ b/arch/powerpc/kernel/module_64.c @@ -0,0 +1,455 @@ +/* Kernel module help for PPC64. + Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/elf.h> +#include <linux/moduleloader.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <asm/module.h> +#include <asm/uaccess.h> + +/* FIXME: We don't do .init separately. To do this, we'd need to have + a separate r2 value in the init and core section, and stub between + them, too. + + Using a magic allocator which places modules within 32MB solves + this, and makes other things simpler. Anton? + --RR. */ +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +/* There's actually a third entry here, but it's unused */ +struct ppc64_opd_entry +{ + unsigned long funcaddr; + unsigned long r2; +}; + +/* Like PPC32, we need little trampolines to do > 24-bit jumps (into + the kernel itself). But on PPC64, these need to be used for every + jump, actually, to reset r2 (TOC+0x8000). */ +struct ppc64_stub_entry +{ + /* 28 byte jump instruction sequence (7 instructions) */ + unsigned char jump[28]; + unsigned char unused[4]; + /* Data for the above code */ + struct ppc64_opd_entry opd; +}; + +/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) + function which may be more than 24-bits away. We could simply + patch the new r2 value and function pointer into the stub, but it's + significantly shorter to put these values at the end of the stub + code, and patch the stub address (32-bits relative to the TOC ptr, + r2) into the stub. */ +static struct ppc64_stub_entry ppc64_stub = +{ .jump = { + 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */ + 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */ + /* Save current r2 value in magic place on the stack. */ + 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */ + 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */ + 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */ + 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */ + 0x4e, 0x80, 0x04, 0x20 /* bctr */ +} }; + +/* Count how many different 24-bit relocations (different symbol, + different addend) */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, j, ret = 0; + + /* FIXME: Only count external ones --RR */ + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + /* Only count 24-bit relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24) + continue; + for (j = 0; j < i; j++) { + /* If this addend appeared before, it's + already been counted */ + if (rela[i].r_info == rela[j].r_info + && rela[i].r_addend == rela[j].r_addend) + break; + } + if (j == i) ret++; + } + return ret; +} + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* Get size of potential trampolines required. */ +static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs) +{ + /* One extra reloc so it's always 0-funcaddr terminated */ + unsigned long relocs = 1; + unsigned i; + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %lu\n", + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela)); + } + } + + DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + return relocs * sizeof(struct ppc64_stub_entry); +} + +static void dedotify_versions(struct modversion_info *vers, + unsigned long size) +{ + struct modversion_info *end; + + for (end = (void *)vers + size; vers < end; vers++) + if (vers->name[0] == '.') + memmove(vers->name, vers->name+1, strlen(vers->name)); +} + +/* Undefined symbols which refer to .funcname, hack to funcname */ +static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) +{ + unsigned int i; + + for (i = 1; i < numsyms; i++) { + if (syms[i].st_shndx == SHN_UNDEF) { + char *name = strtab + syms[i].st_name; + if (name[0] == '.') + memmove(name, name+1, strlen(name)); + } + } +} + +int module_frob_arch_sections(Elf64_Ehdr *hdr, + Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .toc and .stubs sections, symtab and strtab */ + for (i = 1; i < hdr->e_shnum; i++) { + char *p; + if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) + me->arch.stubs_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) + me->arch.toc_section = i; + else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + dedotify_versions((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); + + /* We don't handle .init for the moment: rename to _init */ + while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) + p[0] = '_'; + + if (sechdrs[i].sh_type == SHT_SYMTAB) + dedotify((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf64_Sym), + (void *)hdr + + sechdrs[sechdrs[i].sh_link].sh_offset); + } + if (!me->arch.stubs_section || !me->arch.toc_section) { + printk("%s: doesn't contain .toc or .stubs.\n", me->name); + return -ENOEXEC; + } + + /* Override the stubs size */ + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + return 0; +} + +int apply_relocate(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name); + return -ENOEXEC; +} + +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this + gives the value maximum span in an instruction which uses a signed + offset) */ +static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +{ + return sechdrs[me->arch.toc_section].sh_addr + 0x8000; +} + +/* Both low and high 16 bits are added as SIGNED additions, so if low + 16 bits has high bit set, high 16 bits must be adjusted. These + macros do that (stolen from binutils). */ +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) + +/* Patch stub to reference function and correct r2 value. */ +static inline int create_stub(Elf64_Shdr *sechdrs, + struct ppc64_stub_entry *entry, + struct ppc64_opd_entry *opd, + struct module *me) +{ + Elf64_Half *loc1, *loc2; + long reladdr; + + *entry = ppc64_stub; + + loc1 = (Elf64_Half *)&entry->jump[2]; + loc2 = (Elf64_Half *)&entry->jump[6]; + + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + printk("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + + *loc1 = PPC_HA(reladdr); + *loc2 = PPC_LO(reladdr); + entry->opd.funcaddr = opd->funcaddr; + entry->opd.r2 = opd->r2; + return 1; +} + +/* Create stub to jump to function described in this OPD: we need the + stub to set up the TOC ptr (r2) for the function. */ +static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, + unsigned long opdaddr, + struct module *me) +{ + struct ppc64_stub_entry *stubs; + struct ppc64_opd_entry *opd = (void *)opdaddr; + unsigned int i, num_stubs; + + num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs); + + /* Find this stub, or if that fails, the next avail. entry */ + stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stubs[i].opd.funcaddr; i++) { + BUG_ON(i >= num_stubs); + + if (stubs[i].opd.funcaddr == opd->funcaddr) + return (unsigned long)&stubs[i]; + } + + if (!create_stub(sechdrs, &stubs[i], opd, me)) + return 0; + + return (unsigned long)&stubs[i]; +} + +/* We expect a noop next: if it is, replace it with instruction to + restore r2. */ +static int restore_r2(u32 *instruction, struct module *me) +{ + if (*instruction != 0x60000000) { + printk("%s: Expect noop after relocate, got %08x\n", + me->name, *instruction); + return 0; + } + *instruction = 0xe8410028; /* ld r2,40(r1) */ + return 1; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + unsigned long *location; + unsigned long value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + location, (long)ELF64_R_TYPE(rela[i].r_info), + strtab + sym->st_name, (unsigned long)sym->st_value, + (long)rela[i].r_addend); + + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_PPC64_ADDR32: + /* Simply set it */ + *(u32 *)location = value; + break; + + case R_PPC64_ADDR64: + /* Simply set it */ + *(unsigned long *)location = value; + break; + + case R_PPC64_TOC: + *(unsigned long *)location = my_r2(sechdrs, me); + break; + + case R_PPC64_TOC16: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + + case R_PPC64_TOC16_DS: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if ((value & 3) != 0 || value + 0x8000 > 0xffff) { + printk("%s: bad TOC16_DS relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xfffc) + | (value & 0xfffc); + break; + + case R_PPC_REL24: + /* FIXME: Handle weak symbols here --RR */ + if (sym->st_shndx == SHN_UNDEF) { + /* External: go via stub */ + value = stub_for_addr(sechdrs, value, me); + if (!value) + return -ENOENT; + if (!restore_r2((u32 *)location + 1, me)) + return -ENOEXEC; + } + + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ + printk("%s: REL24 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + + /* Only replace bits 2 through 26 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | (value & 0x03fffffc); + break; + + default: + printk("%s: Unknown ADD relocation: %lu\n", + me->name, + (unsigned long)ELF64_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + + return 0; +} + +LIST_HEAD(module_bug_list); + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *me) +{ + char *secstrings; + unsigned int i; + + me->arch.bug_table = NULL; + me->arch.num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + me->arch.bug_table = (void *) sechdrs[i].sh_addr; + me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&me->arch.bug_list, &module_bug_list); + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ + list_del(&mod->arch.bug_list); +} + +struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct mod_arch_specific *mod; + unsigned int i; + struct bug_entry *bug; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + bug = mod->bug_table; + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c new file mode 100644 index 000000000000..c0fcd29918ce --- /dev/null +++ b/arch/powerpc/kernel/nvram_64.c @@ -0,0 +1,742 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + * + * TODO: Split the /dev/nvram part (that one can use + * drivers/char/generic_nvram.c) from the arch & partition + * parsing code. + */ + +#include <linux/module.h> + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/fcntl.h> +#include <linux/nvram.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <asm/uaccess.h> +#include <asm/nvram.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/machdep.h> + +#undef DEBUG_NVRAM + +static int nvram_scan_partitions(void); +static int nvram_setup_partition(void); +static int nvram_create_os_partition(void); +static int nvram_remove_os_partition(void); + +static struct nvram_partition * nvram_part; +static long nvram_error_log_index = -1; +static long nvram_error_log_size = 0; + +int no_logging = 1; /* Until we initialize everything, + * make sure we don't try logging + * anything */ + +extern volatile int error_log_cnt; + +struct err_log_info { + int error_type; + unsigned int seq_num; +}; + +static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) +{ + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + switch (origin) { + case 1: + offset += file->f_pos; + break; + case 2: + offset += size; + break; + } + if (offset < 0) + return -EINVAL; + file->f_pos = offset; + return file->f_pos; +} + + +static ssize_t dev_nvram_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t len; + char *tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + if (*ppos >= size) + return 0; + if (count > size) + count = size; + + tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); + if (!tmp_buffer) { + printk(KERN_ERR "dev_read_nvram: kmalloc failed\n"); + return -ENOMEM; + } + + len = ppc_md.nvram_read(tmp_buffer, count, ppos); + if ((long)len <= 0) { + kfree(tmp_buffer); + return len; + } + + if (copy_to_user(buf, tmp_buffer, len)) { + kfree(tmp_buffer); + return -EFAULT; + } + + kfree(tmp_buffer); + return len; + +} + +static ssize_t dev_nvram_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t len; + char * tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + if (*ppos >= size) + return 0; + if (count > size) + count = size; + + tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); + if (!tmp_buffer) { + printk(KERN_ERR "dev_nvram_write: kmalloc failed\n"); + return -ENOMEM; + } + + if (copy_from_user(tmp_buffer, buf, count)) { + kfree(tmp_buffer); + return -EFAULT; + } + + len = ppc_md.nvram_write(tmp_buffer, count, ppos); + if ((long)len <= 0) { + kfree(tmp_buffer); + return len; + } + + kfree(tmp_buffer); + return len; +} + +static int dev_nvram_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { +#ifdef CONFIG_PPC_PMAC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + case IOC_NVRAM_GET_OFFSET: { + int part, offset; + + if (_machine != PLATFORM_POWERMAC) + return -EINVAL; + if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (offset < 0) + return offset; + if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0) + return -EFAULT; + return 0; + } +#endif /* CONFIG_PPC_PMAC */ + } + return -EINVAL; +} + +struct file_operations nvram_fops = { + .owner = THIS_MODULE, + .llseek = dev_nvram_llseek, + .read = dev_nvram_read, + .write = dev_nvram_write, + .ioctl = dev_nvram_ioctl, +}; + +static struct miscdevice nvram_dev = { + NVRAM_MINOR, + "nvram", + &nvram_fops +}; + + +#ifdef DEBUG_NVRAM +static void nvram_print_partitions(char * label) +{ + struct list_head * p; + struct nvram_partition * tmp_part; + + printk(KERN_WARNING "--------%s---------\n", label); + printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); + list_for_each(p, &nvram_part->partition) { + tmp_part = list_entry(p, struct nvram_partition, partition); + printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n", + tmp_part->index, tmp_part->header.signature, + tmp_part->header.checksum, tmp_part->header.length, + tmp_part->header.name); + } +} +#endif + + +static int nvram_write_header(struct nvram_partition * part) +{ + loff_t tmp_index; + int rc; + + tmp_index = part->index; + rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); + + return rc; +} + + +static unsigned char nvram_checksum(struct nvram_header *p) +{ + unsigned int c_sum, c_sum2; + unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ + c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5]; + + /* The sum may have spilled into the 3rd byte. Fold it back. */ + c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff; + /* The sum cannot exceed 2 bytes. Fold it into a checksum */ + c_sum2 = (c_sum >> 8) + (c_sum << 8); + c_sum = ((c_sum + c_sum2) >> 8) & 0xff; + return c_sum; +} + + +/* + * Find an nvram partition, sig can be 0 for any + * partition or name can be NULL for any name, else + * tries to match both + */ +struct nvram_partition *nvram_find_partition(int sig, const char *name) +{ + struct nvram_partition * part; + struct list_head * p; + + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + + if (sig && part->header.signature != sig) + continue; + if (name && 0 != strncmp(name, part->header.name, 12)) + continue; + return part; + } + return NULL; +} +EXPORT_SYMBOL(nvram_find_partition); + + +static int nvram_remove_os_partition(void) +{ + struct list_head *i; + struct list_head *j; + struct nvram_partition * part; + struct nvram_partition * cur_part; + int rc; + + list_for_each(i, &nvram_part->partition) { + part = list_entry(i, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_OS) + continue; + + /* Make os partition a free partition */ + part->header.signature = NVRAM_SIG_FREE; + sprintf(part->header.name, "wwwwwwwwwwww"); + part->header.checksum = nvram_checksum(&part->header); + + /* Merge contiguous free partitions backwards */ + list_for_each_prev(j, &part->partition) { + cur_part = list_entry(j, struct nvram_partition, partition); + if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { + break; + } + + part->header.length += cur_part->header.length; + part->header.checksum = nvram_checksum(&part->header); + part->index = cur_part->index; + + list_del(&cur_part->partition); + kfree(cur_part); + j = &part->partition; /* fixup our loop */ + } + + /* Merge contiguous free partitions forwards */ + list_for_each(j, &part->partition) { + cur_part = list_entry(j, struct nvram_partition, partition); + if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { + break; + } + + part->header.length += cur_part->header.length; + part->header.checksum = nvram_checksum(&part->header); + + list_del(&cur_part->partition); + kfree(cur_part); + j = &part->partition; /* fixup our loop */ + } + + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc); + return rc; + } + + } + + return 0; +} + +/* nvram_create_os_partition + * + * Create a OS linux partition to buffer error logs. + * Will create a partition starting at the first free + * space found if space has enough room. + */ +static int nvram_create_os_partition(void) +{ + struct nvram_partition *part; + struct nvram_partition *new_part; + struct nvram_partition *free_part = NULL; + int seq_init[2] = { 0, 0 }; + loff_t tmp_index; + long size = 0; + int rc; + + /* Find a free partition that will give us the maximum needed size + If can't find one that will give us the minimum size needed */ + list_for_each_entry(part, &nvram_part->partition, partition) { + if (part->header.signature != NVRAM_SIG_FREE) + continue; + + if (part->header.length >= NVRAM_MAX_REQ) { + size = NVRAM_MAX_REQ; + free_part = part; + break; + } + if (!size && part->header.length >= NVRAM_MIN_REQ) { + size = NVRAM_MIN_REQ; + free_part = part; + } + } + if (!size) + return -ENOSPC; + + /* Create our OS partition */ + new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); + if (!new_part) { + printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n"); + return -ENOMEM; + } + + new_part->index = free_part->index; + new_part->header.signature = NVRAM_SIG_OS; + new_part->header.length = size; + strcpy(new_part->header.name, "ppc64,linux"); + new_part->header.checksum = nvram_checksum(&new_part->header); + + rc = nvram_write_header(new_part); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \ + failed (%d)\n", rc); + return rc; + } + + /* make sure and initialize to zero the sequence number and the error + type logged */ + tmp_index = new_part->index + NVRAM_HEADER_LEN; + rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write " + "failed (%d)\n", rc); + return rc; + } + + nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN; + nvram_error_log_size = ((part->header.length - 1) * + NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); + + list_add_tail(&new_part->partition, &free_part->partition); + + if (free_part->header.length <= size) { + list_del(&free_part->partition); + kfree(free_part); + return 0; + } + + /* Adjust the partition we stole the space from */ + free_part->index += size * NVRAM_BLOCK_LEN; + free_part->header.length -= size; + free_part->header.checksum = nvram_checksum(&free_part->header); + + rc = nvram_write_header(free_part); + if (rc <= 0) { + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + + return 0; +} + + +/* nvram_setup_partition + * + * This will setup the partition we need for buffering the + * error logs and cleanup partitions if needed. + * + * The general strategy is the following: + * 1.) If there is ppc64,linux partition large enough then use it. + * 2.) If there is not a ppc64,linux partition large enough, search + * for a free partition that is large enough. + * 3.) If there is not a free partition large enough remove + * _all_ OS partitions and consolidate the space. + * 4.) Will first try getting a chunk that will satisfy the maximum + * error log size (NVRAM_MAX_REQ). + * 5.) If the max chunk cannot be allocated then try finding a chunk + * that will satisfy the minum needed (NVRAM_MIN_REQ). + */ +static int nvram_setup_partition(void) +{ + struct list_head * p; + struct nvram_partition * part; + int rc; + + /* For now, we don't do any of this on pmac, until I + * have figured out if it's worth killing some unused stuffs + * in our nvram, as Apple defined partitions use pretty much + * all of the space + */ + if (_machine == PLATFORM_POWERMAC) + return -ENOSPC; + + /* see if we have an OS partition that meets our needs. + will try getting the max we need. If not we'll delete + partitions and try again. */ + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_OS) + continue; + + if (strcmp(part->header.name, "ppc64,linux")) + continue; + + if (part->header.length >= NVRAM_MIN_REQ) { + /* found our partition */ + nvram_error_log_index = part->index + NVRAM_HEADER_LEN; + nvram_error_log_size = ((part->header.length - 1) * + NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); + return 0; + } + } + + /* try creating a partition with the free space we have */ + rc = nvram_create_os_partition(); + if (!rc) { + return 0; + } + + /* need to free up some space */ + rc = nvram_remove_os_partition(); + if (rc) { + return rc; + } + + /* create a partition in this new space */ + rc = nvram_create_os_partition(); + if (rc) { + printk(KERN_ERR "nvram_create_os_partition: Could not find a " + "NVRAM partition large enough\n"); + return rc; + } + + return 0; +} + + +static int nvram_scan_partitions(void) +{ + loff_t cur_index = 0; + struct nvram_header phead; + struct nvram_partition * tmp_part; + unsigned char c_sum; + char * header; + int total_size; + int err; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + total_size = ppc_md.nvram_size(); + + header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL); + if (!header) { + printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n"); + return -ENOMEM; + } + + while (cur_index < total_size) { + + err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index); + if (err != NVRAM_HEADER_LEN) { + printk(KERN_ERR "nvram_scan_partitions: Error parsing " + "nvram partitions\n"); + goto out; + } + + cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */ + + memcpy(&phead, header, NVRAM_HEADER_LEN); + + err = 0; + c_sum = nvram_checksum(&phead); + if (c_sum != phead.checksum) { + printk(KERN_WARNING "WARNING: nvram partition checksum" + " was %02x, should be %02x!\n", + phead.checksum, c_sum); + printk(KERN_WARNING "Terminating nvram partition scan\n"); + goto out; + } + if (!phead.length) { + printk(KERN_WARNING "WARNING: nvram corruption " + "detected: 0-length partition\n"); + goto out; + } + tmp_part = (struct nvram_partition *) + kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + err = -ENOMEM; + if (!tmp_part) { + printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); + goto out; + } + + memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); + tmp_part->index = cur_index; + list_add_tail(&tmp_part->partition, &nvram_part->partition); + + cur_index += phead.length * NVRAM_BLOCK_LEN; + } + err = 0; + + out: + kfree(header); + return err; +} + +static int __init nvram_init(void) +{ + int error; + int rc; + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) + return -ENODEV; + + rc = misc_register(&nvram_dev); + if (rc != 0) { + printk(KERN_ERR "nvram_init: failed to register device\n"); + return rc; + } + + /* initialize our anchor for the nvram partition list */ + nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!nvram_part) { + printk(KERN_ERR "nvram_init: Failed kmalloc\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&nvram_part->partition); + + /* Get all the NVRAM partitions */ + error = nvram_scan_partitions(); + if (error) { + printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); + return error; + } + + if(nvram_setup_partition()) + printk(KERN_WARNING "nvram_init: Could not find nvram partition" + " for nvram buffered error logging.\n"); + +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + + return rc; +} + +void __exit nvram_cleanup(void) +{ + misc_deregister( &nvram_dev ); +} + + +#ifdef CONFIG_PPC_PSERIES + +/* nvram_write_error_log + * + * We need to buffer the error logs into nvram to ensure that we have + * the failure information to decode. If we have a severe error there + * is no way to guarantee that the OS or the machine is in a state to + * get back to user land and write the error to disk. For example if + * the SCSI device driver causes a Machine Check by writing to a bad + * IO address, there is no way of guaranteeing that the device driver + * is in any state that is would also be able to write the error data + * captured to disk, thus we buffer it in NVRAM for analysis on the + * next boot. + * + * In NVRAM the partition containing the error log buffer will looks like: + * Header (in bytes): + * +-----------+----------+--------+------------+------------------+ + * | signature | checksum | length | name | data | + * |0 |1 |2 3|4 15|16 length-1| + * +-----------+----------+--------+------------+------------------+ + * + * The 'data' section would look like (in bytes): + * +--------------+------------+-----------------------------------+ + * | event_logged | sequence # | error log | + * |0 3|4 7|8 nvram_error_log_size-1| + * +--------------+------------+-----------------------------------+ + * + * event_logged: 0 if event has not been logged to syslog, 1 if it has + * sequence #: The unique sequence # for each event. (until it wraps) + * error log: The error log from event_scan + */ +int nvram_write_error_log(char * buff, int length, unsigned int err_type) +{ + int rc; + loff_t tmp_index; + struct err_log_info info; + + if (no_logging) { + return -EPERM; + } + + if (nvram_error_log_index == -1) { + return -ESPIPE; + } + + if (length > nvram_error_log_size) { + length = nvram_error_log_size; + } + + info.error_type = err_type; + info.seq_num = error_log_cnt; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + rc = ppc_md.nvram_write(buff, length, &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + return 0; +} + +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char * buff, int length, unsigned int * err_type) +{ + int rc; + loff_t tmp_index; + struct err_log_info info; + + if (nvram_error_log_index == -1) + return -1; + + if (length > nvram_error_log_size) + length = nvram_error_log_size; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + return rc; + } + + rc = ppc_md.nvram_read(buff, length, &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + return rc; + } + + error_log_cnt = info.seq_num; + *err_type = info.error_type; + + return 0; +} + +/* This doesn't actually zero anything, but it sets the event_logged + * word to tell that this event is safely in syslog. + */ +int nvram_clear_error_log(void) +{ + loff_t tmp_index; + int clear_word = ERR_FLAG_ALREADY_LOGGED; + int rc; + + tmp_index = nvram_error_log_index; + + rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + + return 0; +} + +#endif /* CONFIG_PPC_PSERIES */ + +module_init(nvram_init); +module_exit(nvram_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c new file mode 100644 index 000000000000..a7b68f911eb1 --- /dev/null +++ b/arch/powerpc/kernel/paca.c @@ -0,0 +1,135 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/module.h> + +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/page.h> +#include <asm/lppaca.h> +#include <asm/iseries/it_lp_queue.h> +#include <asm/paca.h> + + +/* This symbol is provided by the linker - let it fill in the paca + * field correctly */ +extern unsigned long __toc_start; + +/* The Paca is an array with one entry per processor. Each contains an + * lppaca, which contains the information shared between the + * hypervisor and Linux. Each also contains an ItLpRegSave area which + * is used by the hypervisor to save registers. + * On systems with hardware multi-threading, there are two threads + * per processor. The Paca array must contain an entry for each thread. + * The VPD Areas will give a max logical processors = 2 * max physical + * processors. The processor VPD array needs one entry per physical + * processor (not thread). + */ +#define PACA_INIT_COMMON(number, start, asrr, asrv) \ + .lock_token = 0x8000, \ + .paca_index = (number), /* Paca Index */ \ + .default_decr = 0x00ff0000, /* Initial Decr */ \ + .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ + .stab_real = (asrr), /* Real pointer to segment table */ \ + .stab_addr = (asrv), /* Virt pointer to segment table */ \ + .cpu_start = (start), /* Processor start */ \ + .hw_cpu_id = 0xffff, \ + .lppaca = { \ + .desc = 0xd397d781, /* "LpPa" */ \ + .size = sizeof(struct lppaca), \ + .dyn_proc_status = 2, \ + .decr_val = 0x00ff0000, \ + .fpregs_in_use = 1, \ + .end_of_quantum = 0xfffffffffffffffful, \ + .slb_count = 64, \ + .vmxregs_in_use = 0, \ + }, \ + +#ifdef CONFIG_PPC_ISERIES +#define PACA_INIT_ISERIES(number) \ + .lppaca_ptr = &paca[number].lppaca, \ + .reg_save_ptr = &paca[number].reg_save, \ + .reg_save = { \ + .xDesc = 0xd397d9e2, /* "LpRS" */ \ + .xSize = sizeof(struct ItLpRegSave) \ + } + +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ + PACA_INIT_ISERIES(number) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ + PACA_INIT_ISERIES(number) \ +} + +#else +#define PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 0, 0, 0) \ +} + +#define BOOTCPU_PACA_INIT(number) \ +{ \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ +} +#endif + +struct paca_struct paca[] = { + BOOTCPU_PACA_INIT(0), +#if NR_CPUS > 1 + PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), +#if NR_CPUS > 4 + PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), +#if NR_CPUS > 8 + PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), + PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), + PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), + PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), + PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), + PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), +#if NR_CPUS > 32 + PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), + PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), + PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), + PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), + PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), + PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), + PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), + PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), +#if NR_CPUS > 64 + PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), + PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), + PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), + PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), + PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), + PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), + PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), + PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), + PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), + PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), + PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), + PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), + PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), + PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), + PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), + PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), +#endif +#endif +#endif +#endif +#endif +}; +EXPORT_SYMBOL(paca); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c new file mode 100644 index 000000000000..8b6008ab217d --- /dev/null +++ b/arch/powerpc/kernel/pci_64.c @@ -0,0 +1,1365 @@ +/* + * Port for PPC64 David Engebretsen, IBM Corp. + * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. + * + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * Rework, based on alpha PCI code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/list.h> +#include <linux/syscalls.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/byteorder.h> +#include <asm/irq.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +unsigned long pci_probe_only = 1; +int pci_assign_all_buses = 0; + +/* + * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch + * devices we don't have access to. + */ +unsigned long io_page_mask; + +EXPORT_SYMBOL(io_page_mask); + +#ifdef CONFIG_PPC_MULTIPLATFORM +static void fixup_resource(struct resource *res, struct pci_dev *dev); +static void do_bus_setup(struct pci_bus *bus); +#endif + +/* pci_io_base -- the base address from which io bars are offsets. + * This is the lowest I/O base address (so bar values are always positive), + * and it *must* be the start of ISA space if an ISA bus exists because + * ISA drivers use hard coded offsets. If no ISA bus exists a dummy + * page is mapped and isa_io_limit prevents access to it. + */ +unsigned long isa_io_base; /* NULL if no ISA bus */ +EXPORT_SYMBOL(isa_io_base); +unsigned long pci_io_base; +EXPORT_SYMBOL(pci_io_base); + +void iSeries_pcibios_init(void); + +LIST_HEAD(hose_list); + +struct dma_mapping_ops pci_dma_ops; +EXPORT_SYMBOL(pci_dma_ops); + +int global_phb_number; /* Global phb counter */ + +/* Cached ISA bridge dev. */ +struct pci_dev *ppc64_isabridge_dev = NULL; + +static void fixup_broken_pcnet32(struct pci_dev* dev) +{ + if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { + dev->vendor = PCI_VENDOR_ID_AMD; + pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32); + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + unsigned long offset = 0; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + + if (res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + unsigned long offset = 0; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + + if (res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +#endif + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void pcibios_align_resource(void *data, struct resource *res, + unsigned long size, unsigned long align) +{ + struct pci_dev *dev = data; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long start = res->start; + unsigned long alignto; + + if (res->flags & IORESOURCE_IO) { + unsigned long offset = (unsigned long)hose->io_base_virt - + pci_io_base; + /* Make sure we start at our min on all hoses */ + if (start - offset < PCIBIOS_MIN_IO) + start = PCIBIOS_MIN_IO + offset; + + /* + * Put everything into 0x00-0xff region modulo 0x400 + */ + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; + + } else if (res->flags & IORESOURCE_MEM) { + /* Make sure we start at our min on all hoses */ + if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM) + start = PCIBIOS_MIN_MEM + hose->pci_mem_offset; + + /* Align to multiple of size of minimum base. */ + alignto = max(0x1000UL, align); + start = ALIGN(start, alignto); + } + + res->start = start; +} + +static DEFINE_SPINLOCK(hose_spinlock); + +/* + * pci_controller(phb) initialized common variables. + */ +static void __devinit pci_setup_pci_controller(struct pci_controller *hose) +{ + memset(hose, 0, sizeof(struct pci_controller)); + + spin_lock(&hose_spinlock); + hose->global_number = global_phb_number++; + list_add_tail(&hose->list_node, &hose_list); + spin_unlock(&hose_spinlock); +} + +static void add_linux_pci_domain(struct device_node *dev, + struct pci_controller *phb) +{ + struct property *of_prop; + unsigned int size; + + of_prop = (struct property *) + get_property(dev, "linux,pci-domain", &size); + if (of_prop != NULL) + return; + WARN_ON(of_prop && size < sizeof(int)); + if (of_prop && size < sizeof(int)) + of_prop = NULL; + size = sizeof(struct property) + sizeof(int); + if (of_prop == NULL) { + if (mem_init_done) + of_prop = kmalloc(size, GFP_KERNEL); + else + of_prop = alloc_bootmem(size); + } + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(int); + of_prop->value = (unsigned char *)&of_prop[1]; + *((int *)of_prop->value) = phb->global_number; + prom_add_property(dev, of_prop); +} + +struct pci_controller * pcibios_alloc_controller(struct device_node *dev) +{ + struct pci_controller *phb; + + if (mem_init_done) + phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + else + phb = alloc_bootmem(sizeof (struct pci_controller)); + if (phb == NULL) + return NULL; + pci_setup_pci_controller(phb); + phb->arch_data = dev; + phb->is_dynamic = mem_init_done; + if (dev) + add_linux_pci_domain(dev, phb); + return phb; +} + +void pcibios_free_controller(struct pci_controller *phb) +{ + if (phb->arch_data) { + struct device_node *np = phb->arch_data; + int *domain = (int *)get_property(np, + "linux,pci-domain", NULL); + if (domain) + *domain = -1; + } + if (phb->is_dynamic) + kfree(phb); +} + +static void __init pcibios_claim_one_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + struct pci_bus *child_bus; + + list_for_each_entry(dev, &b->devices, bus_list) { + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + pci_claim_resource(dev, i); + } + } + + list_for_each_entry(child_bus, &b->children, node) + pcibios_claim_one_bus(child_bus); +} + +#ifndef CONFIG_PPC_ISERIES +static void __init pcibios_claim_of_setup(void) +{ + struct pci_bus *b; + + list_for_each_entry(b, &pci_root_buses, node) + pcibios_claim_one_bus(b); +} +#endif + +#ifdef CONFIG_PPC_MULTIPLATFORM +static u32 get_int_prop(struct device_node *np, const char *name, u32 def) +{ + u32 *prop; + int len; + + prop = (u32 *) get_property(np, name, &len); + if (prop && len >= 4) + return *prop; + return def; +} + +static unsigned int pci_parse_of_flags(u32 addr0) +{ + unsigned int flags = 0; + + if (addr0 & 0x02000000) { + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; + if (addr0 & 0x40000000) + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; + } else if (addr0 & 0x01000000) + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; + return flags; +} + +#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1]) + +static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) +{ + u64 base, size; + unsigned int flags; + struct resource *res; + u32 *addrs, i; + int proplen; + + addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); + if (!addrs) + return; + for (; proplen >= 20; proplen -= 20, addrs += 5) { + flags = pci_parse_of_flags(addrs[0]); + if (!flags) + continue; + base = GET_64BIT(addrs, 1); + size = GET_64BIT(addrs, 3); + if (!size) + continue; + i = addrs[0] & 0xff; + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { + res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; + } else if (i == dev->rom_base_reg) { + res = &dev->resource[PCI_ROM_RESOURCE]; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + } else { + printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + continue; + } + res->start = base; + res->end = base + size - 1; + res->flags = flags; + res->name = pci_name(dev); + fixup_resource(res, dev); + } +} + +struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn) +{ + struct pci_dev *dev; + const char *type; + + dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL); + if (!dev) + return NULL; + type = get_property(node, "device_type", NULL); + if (type == NULL) + type = ""; + + memset(dev, 0, sizeof(struct pci_dev)); + dev->bus = bus; + dev->sysdata = node; + dev->dev.parent = bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->devfn = devfn; + dev->multifunction = 0; /* maybe a lie? */ + + dev->vendor = get_int_prop(node, "vendor-id", 0xffff); + dev->device = get_int_prop(node, "device-id", 0xffff); + dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); + dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); + + dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/ + + sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), + dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dev->class = get_int_prop(node, "class-code", 0); + + dev->current_state = 4; /* unknown power state */ + + if (!strcmp(type, "pci")) { + /* a PCI-PCI bridge */ + dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; + dev->rom_base_reg = PCI_ROM_ADDRESS1; + } else if (!strcmp(type, "cardbus")) { + dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; + } else { + dev->hdr_type = PCI_HEADER_TYPE_NORMAL; + dev->rom_base_reg = PCI_ROM_ADDRESS; + dev->irq = NO_IRQ; + if (node->n_intrs > 0) { + dev->irq = node->intrs[0].line; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } + } + + pci_parse_of_addrs(node, dev); + + pci_device_add(dev, bus); + + /* XXX pci_scan_msi_device(dev); */ + + return dev; +} +EXPORT_SYMBOL(of_create_pci_dev); + +void __devinit of_scan_bus(struct device_node *node, + struct pci_bus *bus) +{ + struct device_node *child = NULL; + u32 *reg; + int reglen, devfn; + struct pci_dev *dev; + + while ((child = of_get_next_child(node, child)) != NULL) { + reg = (u32 *) get_property(child, "reg", ®len); + if (reg == NULL || reglen < 20) + continue; + devfn = (reg[0] >> 8) & 0xff; + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(child, bus, devfn); + if (!dev) + continue; + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + of_scan_pci_bridge(child, dev); + } + + do_bus_setup(bus); +} +EXPORT_SYMBOL(of_scan_bus); + +void __devinit of_scan_pci_bridge(struct device_node *node, + struct pci_dev *dev) +{ + struct pci_bus *bus; + u32 *busrange, *ranges; + int len, i, mode; + struct resource *res; + unsigned int flags; + u64 size; + + /* parse bus-range property */ + busrange = (u32 *) get_property(node, "bus-range", &len); + if (busrange == NULL || len != 8) { + printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n", + node->full_name); + return; + } + ranges = (u32 *) get_property(node, "ranges", &len); + if (ranges == NULL) { + printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n", + node->full_name); + return; + } + + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } + + bus->primary = dev->bus->number; + bus->subordinate = busrange[1]; + bus->bridge_ctl = 0; + bus->sysdata = node; + + /* parse ranges property */ + /* PCI #address-cells == 3 and #size-cells == 2 always */ + res = &dev->resource[PCI_BRIDGE_RESOURCES]; + for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { + res->flags = 0; + bus->resource[i] = res; + ++res; + } + i = 1; + for (; len >= 32; len -= 32, ranges += 8) { + flags = pci_parse_of_flags(ranges[0]); + size = GET_64BIT(ranges, 6); + if (flags == 0 || size == 0) + continue; + if (flags & IORESOURCE_IO) { + res = bus->resource[0]; + if (res->flags) { + printk(KERN_ERR "PCI: ignoring extra I/O range" + " for bridge %s\n", node->full_name); + continue; + } + } else { + if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { + printk(KERN_ERR "PCI: too many memory ranges" + " for bridge %s\n", node->full_name); + continue; + } + res = bus->resource[i]; + ++i; + } + res->start = GET_64BIT(ranges, 1); + res->end = res->start + size - 1; + res->flags = flags; + fixup_resource(res, dev); + } + sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), + bus->number); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + if (mode == PCI_PROBE_DEVTREE) + of_scan_bus(node, bus); + else if (mode == PCI_PROBE_NORMAL) + pci_scan_child_bus(bus); +} +EXPORT_SYMBOL(of_scan_pci_bridge); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +void __devinit scan_phb(struct pci_controller *hose) +{ + struct pci_bus *bus; + struct device_node *node = hose->arch_data; + int i, mode; + struct resource *res; + + bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node); + if (bus == NULL) { + printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", + hose->global_number); + return; + } + bus->secondary = hose->first_busno; + hose->bus = bus; + + bus->resource[0] = res = &hose->io_resource; + if (res->flags && request_resource(&ioport_resource, res)) + printk(KERN_ERR "Failed to request PCI IO region " + "on PCI domain %04x\n", hose->global_number); + + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; + bus->resource[i+1] = res; + if (res->flags && request_resource(&iomem_resource, res)) + printk(KERN_ERR "Failed to request PCI memory region " + "on PCI domain %04x\n", hose->global_number); + } + + mode = PCI_PROBE_NORMAL; +#ifdef CONFIG_PPC_MULTIPLATFORM + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + if (mode == PCI_PROBE_DEVTREE) { + bus->subordinate = hose->last_busno; + of_scan_bus(node, bus); + } +#endif /* CONFIG_PPC_MULTIPLATFORM */ + if (mode == PCI_PROBE_NORMAL) + hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + pci_bus_add_devices(bus); +} + +static int __init pcibios_init(void) +{ + struct pci_controller *hose, *tmp; + + /* For now, override phys_mem_access_prot. If we need it, + * later, we may move that initialization to each ppc_md + */ + ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + +#ifdef CONFIG_PPC_ISERIES + iSeries_pcibios_init(); +#endif + + printk("PCI: Probing PCI hardware\n"); + + /* Scan all of the recorded PCI controllers. */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + scan_phb(hose); + +#ifndef CONFIG_PPC_ISERIES + if (pci_probe_only) + pcibios_claim_of_setup(); + else + /* FIXME: `else' will be removed when + pci_assign_unassigned_resources() is able to work + correctly with [partially] allocated PCI tree. */ + pci_assign_unassigned_resources(); +#endif /* !CONFIG_PPC_ISERIES */ + + /* Call machine dependent final fixup */ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); + + /* Cache the location of the ISA bridge (if we have one) */ + ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); + if (ppc64_isabridge_dev != NULL) + printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); + +#ifdef CONFIG_PPC_MULTIPLATFORM + /* map in PCI I/O space */ + phbs_remap_io(); +#endif + + printk("PCI: Probing PCI hardware done\n"); + + return 0; +} + +subsys_initcall(pcibios_init); + +char __init *pcibios_setup(char *str) +{ + return str; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, oldcmd; + int i; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + /* Only set up the requested stuff */ + if (!(mask & (1<<i))) + continue; + + if (res->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + + if (cmd != oldcmd) { + printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", + pci_name(dev), cmd); + /* Enable the appropriate bits in the PCI command register. */ + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * Return the domain number for this bus. + */ +int pci_domain_nr(struct pci_bus *bus) +{ +#ifdef CONFIG_PPC_ISERIES + return 0; +#else + struct pci_controller *hose = pci_bus_to_host(bus); + + return hose->global_number; +#endif +} + +EXPORT_SYMBOL(pci_domain_nr); + +/* Decide whether to display the domain number in /proc */ +int pci_proc_domain(struct pci_bus *bus) +{ +#ifdef CONFIG_PPC_ISERIES + return 0; +#else + struct pci_controller *hose = pci_bus_to_host(bus); + return hose->buid; +#endif +} + +/* + * Platform support for /proc/bus/pci/X/Y mmap()s, + * modelled on the sparc64 implementation by Dave Miller. + * -- paulus. + */ + +/* + * Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, + unsigned long *offset, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return NULL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { + *offset += hose->pci_mem_offset; + res_bit = IORESOURCE_MEM; + } else { + io_offset = (unsigned long)hose->io_base_virt - pci_io_base; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + *offset += hose->io_base_phys - io_offset; + return rp; + } + + return NULL; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, + pgprot_t protection, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long prot = pgprot_val(protection); + + /* Write combine is always 0 on non-memory space mappings. On + * memory space, if the user didn't pass 1, we check for a + * "prefetchable" resource. This is a bit hackish, but we use + * this to workaround the inability of /sysfs to provide a write + * combine bit + */ + if (mmap_state != pci_mmap_mem) + write_combine = 0; + else if (write_combine == 0) { + if (rp->flags & IORESOURCE_PREFETCH) + write_combine = 1; + } + + /* XXX would be nice to have a way to ask for write-through */ + prot |= _PAGE_NO_CACHE; + if (write_combine) + prot &= ~_PAGE_GUARDED; + else + prot |= _PAGE_GUARDED; + + printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, + prot); + + return __pgprot(prot); +} + +/* + * This one is used by /dev/mem and fbdev who have no clue about the + * PCI device, it tries to find the PCI device first and calls the + * above routine + */ +pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long pfn, + unsigned long size, + pgprot_t protection) +{ + struct pci_dev *pdev = NULL; + struct resource *found = NULL; + unsigned long prot = pgprot_val(protection); + unsigned long offset = pfn << PAGE_SHIFT; + int i; + + if (page_is_ram(pfn)) + return __pgprot(prot); + + prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; + + for_each_pci_dev(pdev) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + int flags = rp->flags; + + /* Active and same type? */ + if ((flags & IORESOURCE_MEM) == 0) + continue; + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || + offset > rp->end) + continue; + found = rp; + break; + } + if (found) + break; + } + if (found) { + if (found->flags & IORESOURCE_PREFETCH) + prot &= ~_PAGE_GUARDED; + pci_dev_put(pdev); + } + + DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); + + return __pgprot(prot); +} + + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + struct resource *rp; + int ret; + + rp = __pci_mmap_make_offset(dev, &offset, mmap_state); + if (rp == NULL) + return -EINVAL; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO; + vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, + vma->vm_page_prot, + mmap_state, write_combine); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +#ifdef CONFIG_PPC_MULTIPLATFORM +static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +void pcibios_add_platform_entries(struct pci_dev *pdev) +{ +#ifdef CONFIG_PPC_MULTIPLATFORM + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_MULTIPLATFORM */ +} + +#ifdef CONFIG_PPC_MULTIPLATFORM + +#define ISA_SPACE_MASK 0x1 +#define ISA_SPACE_IO 0x1 + +static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys, + void __iomem * phb_io_base_virt) +{ + struct isa_range *range; + unsigned long pci_addr; + unsigned int isa_addr; + unsigned int size; + int rlen = 0; + + range = (struct isa_range *) get_property(isa_node, "ranges", &rlen); + if (range == NULL || (rlen < sizeof(struct isa_range))) { + printk(KERN_ERR "no ISA ranges or unexpected isa range size," + "mapping 64k\n"); + __ioremap_explicit(phb_io_base_phys, + (unsigned long)phb_io_base_virt, + 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED); + return; + } + + /* From "ISA Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 1: an ISA address + * cells 2 - 4: a PCI address + * (size depending on dev->n_addr_cells) + * cell 5: the size of the range + */ + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { + isa_addr = range->isa_addr.a_lo; + pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | + range->pci_addr.a_lo; + + /* Assume these are both zero */ + if ((pci_addr != 0) || (isa_addr != 0)) { + printk(KERN_ERR "unexpected isa to pci mapping: %s\n", + __FUNCTION__); + return; + } + + size = PAGE_ALIGN(range->size); + + __ioremap_explicit(phb_io_base_phys, + (unsigned long) phb_io_base_virt, + size, _PAGE_NO_CACHE | _PAGE_GUARDED); + } +} + +void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int prim) +{ + unsigned int *ranges, pci_space; + unsigned long size; + int rlen = 0; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + unsigned long pci_addr, cpu_phys_addr; + + np = na + 5; + + /* From "PCI Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (ranges == NULL) + return; + hose->io_base_phys = 0; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + pci_space = ranges[0]; + pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2]; + + cpu_phys_addr = ranges[3]; + if (na >= 2) + cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4]; + + size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4]; + ranges += np; + if (size == 0) + continue; + + /* Now consume following elements while they are contiguous */ + while (rlen >= np * sizeof(unsigned int)) { + unsigned long addr, phys; + + if (ranges[0] != pci_space) + break; + addr = ((unsigned long)ranges[1] << 32) | ranges[2]; + phys = ranges[3]; + if (na >= 2) + phys = (phys << 32) | ranges[4]; + if (addr != pci_addr + size || + phys != cpu_phys_addr + size) + break; + + size += ((unsigned long)ranges[na+3] << 32) + | ranges[na+4]; + ranges += np; + rlen -= np * sizeof(unsigned int); + } + + switch ((pci_space >> 24) & 0x3) { + case 1: /* I/O space */ + hose->io_base_phys = cpu_phys_addr; + hose->pci_io_size = size; + + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = pci_addr; + DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number, + res->start, res->start + size - 1); + break; + case 2: /* memory space */ + memno = 0; + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + + if (memno == 0) + hose->pci_mem_offset = cpu_phys_addr - pci_addr; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = cpu_phys_addr; + DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number, + res->start, res->start + size - 1); + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + } +} + +void __init pci_setup_phb_io(struct pci_controller *hose, int primary) +{ + unsigned long size = hose->pci_io_size; + unsigned long io_virt_offset; + struct resource *res; + struct device_node *isa_dn; + + hose->io_base_virt = reserve_phb_iospace(size); + DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", + hose->global_number, hose->io_base_phys, + (unsigned long) hose->io_base_virt); + + if (primary) { + pci_io_base = (unsigned long)hose->io_base_virt; + isa_dn = of_find_node_by_type(NULL, "isa"); + if (isa_dn) { + isa_io_base = pci_io_base; + pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, + hose->io_base_virt); + of_node_put(isa_dn); + /* Allow all IO */ + io_page_mask = -1; + } + } + + io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; + res = &hose->io_resource; + res->start += io_virt_offset; + res->end += io_virt_offset; +} + +void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, + int primary) +{ + unsigned long size = hose->pci_io_size; + unsigned long io_virt_offset; + struct resource *res; + + hose->io_base_virt = __ioremap(hose->io_base_phys, size, + _PAGE_NO_CACHE | _PAGE_GUARDED); + DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", + hose->global_number, hose->io_base_phys, + (unsigned long) hose->io_base_virt); + + if (primary) + pci_io_base = (unsigned long)hose->io_base_virt; + + io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; + res = &hose->io_resource; + res->start += io_virt_offset; + res->end += io_virt_offset; +} + + +static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, + unsigned long *start_virt, unsigned long *size) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_bus_region region; + struct resource *res; + + if (bus->self) { + res = bus->resource[0]; + pcibios_resource_to_bus(bus->self, ®ion, res); + *start_phys = hose->io_base_phys + region.start; + *start_virt = (unsigned long) hose->io_base_virt + + region.start; + if (region.end > region.start) + *size = region.end - region.start + 1; + else { + printk("%s(): unexpected region 0x%lx->0x%lx\n", + __FUNCTION__, region.start, region.end); + return 1; + } + + } else { + /* Root Bus */ + res = &hose->io_resource; + *start_phys = hose->io_base_phys; + *start_virt = (unsigned long) hose->io_base_virt; + if (res->end > res->start) + *size = res->end - res->start + 1; + else { + printk("%s(): unexpected region 0x%lx->0x%lx\n", + __FUNCTION__, res->start, res->end); + return 1; + } + } + + return 0; +} + +int unmap_bus_range(struct pci_bus *bus) +{ + unsigned long start_phys; + unsigned long start_virt; + unsigned long size; + + if (!bus) { + printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); + return 1; + } + + if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) + return 1; + if (iounmap_explicit((void __iomem *) start_virt, size)) + return 1; + + return 0; +} +EXPORT_SYMBOL(unmap_bus_range); + +int remap_bus_range(struct pci_bus *bus) +{ + unsigned long start_phys; + unsigned long start_virt; + unsigned long size; + + if (!bus) { + printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); + return 1; + } + + + if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) + return 1; + if (start_phys == 0) + return 1; + printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); + if (__ioremap_explicit(start_phys, start_virt, size, + _PAGE_NO_CACHE | _PAGE_GUARDED)) + return 1; + + return 0; +} +EXPORT_SYMBOL(remap_bus_range); + +void phbs_remap_io(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + remap_bus_range(hose->bus); +} + +static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long start, end, mask, offset; + + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + start = res->start += offset; + end = res->end += offset; + + /* Need to allow IO access to pages that are in the + ISA range */ + if (start < MAX_ISA_PORT) { + if (end > MAX_ISA_PORT) + end = MAX_ISA_PORT; + + start >>= PAGE_SHIFT; + end >>= PAGE_SHIFT; + + /* get the range of pages for the map */ + mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); + io_page_mask |= mask; + } + } else if (res->flags & IORESOURCE_MEM) { + res->start += hose->pci_mem_offset; + res->end += hose->pci_mem_offset; + } +} + +void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) + if (dev->resource[i].flags) + fixup_resource(&dev->resource[i], dev); +} +EXPORT_SYMBOL(pcibios_fixup_device_resources); + +static void __devinit do_bus_setup(struct pci_bus *bus) +{ + struct pci_dev *dev; + + ppc_md.iommu_bus_setup(bus); + + list_for_each_entry(dev, &bus->devices, bus_list) + ppc_md.iommu_dev_setup(dev); + + if (ppc_md.irq_bus_setup) + ppc_md.irq_bus_setup(bus); +} + +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + + if (dev && pci_probe_only && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + /* This is a subordinate bridge */ + + pci_read_bridge_bases(bus); + pcibios_fixup_device_resources(dev, bus); + } + + do_bus_setup(bus); + + if (!pci_probe_only) + return; + + list_for_each_entry(dev, &bus->devices, bus_list) + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); +} +EXPORT_SYMBOL(pcibios_fixup_bus); + +/* + * Reads the interrupt pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + */ +int pci_read_irq_line(struct pci_dev *pci_dev) +{ + u8 intpin; + struct device_node *node; + + pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin); + if (intpin == 0) + return 0; + + node = pci_device_to_OF_node(pci_dev); + if (node == NULL) + return -1; + + if (node->n_intrs == 0) + return -1; + + pci_dev->irq = node->intrs[0].line; + + pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); + + return 0; +} +EXPORT_SYMBOL(pci_read_irq_line); + +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + u64 *start, u64 *end) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned long offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = pci_io_base - (unsigned long)hose->io_base_virt + + hose->io_base_phys; + + *start = rsrc->start + offset; + *end = rsrc->end + offset; +} + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +long sys_pciconfig_iobase(long which, unsigned long in_bus, + unsigned long in_devfn) +{ + struct pci_controller* hose; + struct list_head *ln; + struct pci_bus *bus = NULL; + struct device_node *hose_node; + + /* Argh ! Please forgive me for that hack, but that's the + * simplest way to get existing XFree to not lockup on some + * G5 machines... So when something asks for bus 0 io base + * (bus 0 is HT root), we return the AGP one instead. + */ + if (machine_is_compatible("MacRISC4")) + if (in_bus == 0) + in_bus = 0xf0; + + /* That syscall isn't quite compatible with PCI domains, but it's + * used on pre-domains setup. We return the first match + */ + + for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { + bus = pci_bus_b(ln); + if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate)) + break; + bus = NULL; + } + if (bus == NULL || bus->sysdata == NULL) + return -ENODEV; + + hose_node = (struct device_node *)bus->sysdata; + hose = PCI_DN(hose_node)->phb; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return -EINVAL; + } + + return -EOPNOTSUPP; +} diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c new file mode 100644 index 000000000000..e1a32f802c0b --- /dev/null +++ b/arch/powerpc/kernel/pci_direct_iommu.c @@ -0,0 +1,94 @@ +/* + * Support for DMA from PCI devices to main memory on + * machines without an iommu or with directly addressable + * RAM (typically a pmac with 2Gb of RAM or less) + * + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/abs_addr.h> +#include <asm/ppc-pci.h> + +static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + void *ret; + + ret = (void *)__get_free_pages(flag, get_order(size)); + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_abs(ret); + } + return ret; +} + +static void pci_direct_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + +static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr, + size_t size, enum dma_data_direction direction) +{ + return virt_to_abs(ptr); +} + +static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction) +{ +} + +static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = page_to_phys(sg->page) + sg->offset; + sg->dma_length = sg->length; + } + + return nents; +} + +static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ +} + +static int pci_direct_dma_supported(struct device *dev, u64 mask) +{ + return mask < 0x100000000ull; +} + +void __init pci_direct_iommu_init(void) +{ + pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent; + pci_dma_ops.free_coherent = pci_direct_free_coherent; + pci_dma_ops.map_single = pci_direct_map_single; + pci_dma_ops.unmap_single = pci_direct_unmap_single; + pci_dma_ops.map_sg = pci_direct_map_sg; + pci_dma_ops.unmap_sg = pci_direct_unmap_sg; + pci_dma_ops.dma_supported = pci_direct_dma_supported; +} diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c new file mode 100644 index 000000000000..12c4c9e9bbc7 --- /dev/null +++ b/arch/powerpc/kernel/pci_dn.c @@ -0,0 +1,230 @@ +/* + * pci_dn.c + * + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * PCI manipulation via device_nodes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/bootmem.h> + +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/pSeries_reconfig.h> +#include <asm/ppc-pci.h> + +/* + * Traverse_func that inits the PCI fields of the device node. + * NOTE: this *must* be done before read/write config to the device. + */ +static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) +{ + struct pci_controller *phb = data; + int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); + u32 *regs; + struct pci_dn *pdn; + + if (mem_init_done) + pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); + else + pdn = alloc_bootmem(sizeof(*pdn)); + if (pdn == NULL) + return NULL; + memset(pdn, 0, sizeof(*pdn)); + dn->data = pdn; + pdn->node = dn; + pdn->phb = phb; + regs = (u32 *)get_property(dn, "reg", NULL); + if (regs) { + /* First register entry is addr (00BBSS00) */ + pdn->busno = (regs[0] >> 16) & 0xff; + pdn->devfn = (regs[0] >> 8) & 0xff; + } + + pdn->pci_ext_config_space = (type && *type == 1); + return NULL; +} + +/* + * Traverse a device tree stopping each PCI device in the tree. + * This is done depth first. As each node is processed, a "pre" + * function is called and the children are processed recursively. + * + * The "pre" func returns a value. If non-zero is returned from + * the "pre" func, the traversal stops and this value is returned. + * This return value is useful when using traverse as a method of + * finding a device. + * + * NOTE: we do not run the func for devices that do not appear to + * be PCI except for the start node which we assume (this is good + * because the start node is often a phb which may be missing PCI + * properties). + * We use the class-code as an indicator. If we run into + * one of these nodes we also assume its siblings are non-pci for + * performance. + */ +void *traverse_pci_devices(struct device_node *start, traverse_func pre, + void *data) +{ + struct device_node *dn, *nextdn; + void *ret; + + /* We started with a phb, iterate all childs */ + for (dn = start->child; dn; dn = nextdn) { + u32 *classp, class; + + nextdn = NULL; + classp = (u32 *)get_property(dn, "class-code", NULL); + class = classp ? *classp : 0; + + if (pre && ((ret = pre(dn, data)) != NULL)) + return ret; + + /* If we are a PCI bridge, go down */ + if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI || + (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)) + /* Depth first...do children */ + nextdn = dn->child; + else if (dn->sibling) + /* ok, try next sibling instead. */ + nextdn = dn->sibling; + if (!nextdn) { + /* Walk up to next valid sibling. */ + do { + dn = dn->parent; + if (dn == start) + return NULL; + } while (dn->sibling == NULL); + nextdn = dn->sibling; + } + } + return NULL; +} + +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ +void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) +{ + struct device_node * dn = (struct device_node *) phb->arch_data; + struct pci_dn *pdn; + + /* PHB nodes themselves must not match */ + update_dn_pci_info(dn, phb); + pdn = dn->data; + if (pdn) { + pdn->devfn = pdn->busno = -1; + pdn->phb = phb; + } + + /* Update dn->phb ptrs for new phb and children devices */ + traverse_pci_devices(dn, update_dn_pci_info, phb); +} + +/* + * Traversal func that looks for a <busno,devfcn> value. + * If found, the pci_dn is returned (thus terminating the traversal). + */ +static void *is_devfn_node(struct device_node *dn, void *data) +{ + int busno = ((unsigned long)data >> 8) & 0xff; + int devfn = ((unsigned long)data) & 0xff; + struct pci_dn *pci = dn->data; + + if (pci && (devfn == pci->devfn) && (busno == pci->busno)) + return dn; + return NULL; +} + +/* + * This is the "slow" path for looking up a device_node from a + * pci_dev. It will hunt for the device under its parent's + * phb and then update sysdata for a future fastpath. + * + * It may also do fixups on the actual device since this happens + * on the first read/write. + * + * Note that it also must deal with devices that don't exist. + * In this case it may probe for real hardware ("just in case") + * and add a device_node to the device tree if necessary. + * + */ +struct device_node *fetch_dev_dn(struct pci_dev *dev) +{ + struct device_node *orig_dn = dev->sysdata; + struct device_node *dn; + unsigned long searchval = (dev->bus->number << 8) | dev->devfn; + + dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval); + if (dn) + dev->sysdata = dn; + return dn; +} +EXPORT_SYMBOL(fetch_dev_dn); + +static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + struct device_node *np = node; + struct pci_dn *pci = NULL; + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + pci = np->parent->data; + if (pci) + update_dn_pci_info(np, pci->phb); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pci_dn_reconfig_nb = { + .notifier_call = pci_dn_reconfig_notifier, +}; + +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. + */ +void __init pci_devs_phb_init(void) +{ + struct pci_controller *phb, *tmp; + + /* This must be done first so the device nodes have valid pci info! */ + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + pci_devs_phb_init_dynamic(phb); + + pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); +} diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c new file mode 100644 index 000000000000..bdf15dbbf4f0 --- /dev/null +++ b/arch/powerpc/kernel/pci_iommu.c @@ -0,0 +1,128 @@ +/* + * arch/ppc64/kernel/pci_iommu.c + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup, new allocation schemes: + * Copyright (C) 2004 Olof Johansson, IBM Corporation + * + * Dynamic DMA mapping support, platform-independent parts. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/iommu.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> + +/* + * We can use ->sysdata directly and avoid the extra work in + * pci_device_to_OF_node since ->sysdata will have been initialised + * in the iommu init code for all devices. + */ +#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata)) + +static inline struct iommu_table *devnode_table(struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev) { + pdev = ppc64_isabridge_dev; + if (!pdev) + return NULL; + } else + pdev = to_pci_dev(dev); + + return PCI_DN(PCI_GET_DN(pdev))->iommu_table; +} + + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle, + flag); +} + +static void pci_iommu_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address of the buffer + * passed here is the kernel (virtual) address of the buffer. The buffer + * need not be page aligned, the dma_addr_t returned will point to the same + * byte within the page as vaddr. + */ +static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ + return iommu_map_single(devnode_table(hwdev), vaddr, size, direction); +} + + +static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction); +} + + +static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + return iommu_map_sg(pdev, devnode_table(pdev), sglist, + nelems, direction); +} + +static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction); +} + +/* We support DMA to/from any memory page via the iommu */ +static int pci_iommu_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +void pci_iommu_init(void) +{ + pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent; + pci_dma_ops.free_coherent = pci_iommu_free_coherent; + pci_dma_ops.map_single = pci_iommu_map_single; + pci_dma_ops.unmap_single = pci_iommu_unmap_single; + pci_dma_ops.map_sg = pci_iommu_map_sg; + pci_dma_ops.unmap_sg = pci_iommu_unmap_sg; + pci_dma_ops.dma_supported = pci_iommu_dma_supported; +} diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 47d6f7e2ea9f..94db25708456 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -44,6 +44,7 @@ #include <asm/cputable.h> #include <asm/btext.h> #include <asm/div64.h> +#include <asm/signal.h> #ifdef CONFIG_8xx #include <asm/commproc.h> @@ -56,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs); extern void alignment_exception(struct pt_regs *regs); extern void program_check_exception(struct pt_regs *regs); extern void single_step_exception(struct pt_regs *regs); -extern int do_signal(sigset_t *, struct pt_regs *); extern int pmac_newworld; extern int sys_sigreturn(struct pt_regs *regs); @@ -105,6 +105,13 @@ EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); +#ifndef __powerpc64__ +EXPORT_SYMBOL(__ide_mm_insl); +EXPORT_SYMBOL(__ide_mm_outsw); +EXPORT_SYMBOL(__ide_mm_insw); +EXPORT_SYMBOL(__ide_mm_outsl); +#endif + EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); EXPORT_SYMBOL(_insw); @@ -139,9 +146,6 @@ EXPORT_SYMBOL(pci_bus_io_base); EXPORT_SYMBOL(pci_bus_io_base_phys); EXPORT_SYMBOL(pci_bus_mem_base_phys); EXPORT_SYMBOL(pci_bus_to_hose); -EXPORT_SYMBOL(pci_resource_to_bus); -EXPORT_SYMBOL(pci_phys_to_bus); -EXPORT_SYMBOL(pci_bus_to_phys); #endif /* CONFIG_PCI */ #ifdef CONFIG_NOT_COHERENT_CACHE @@ -159,15 +163,13 @@ EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(giveup_spe); #endif /* CONFIG_SPE */ -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(__flush_icache_range); -#else +#ifndef CONFIG_PPC64 EXPORT_SYMBOL(flush_instruction_cache); -EXPORT_SYMBOL(flush_icache_range); EXPORT_SYMBOL(flush_tlb_kernel_range); EXPORT_SYMBOL(flush_tlb_page); EXPORT_SYMBOL(_tlbie); #endif +EXPORT_SYMBOL(__flush_icache_range); EXPORT_SYMBOL(flush_dcache_range); #ifdef CONFIG_SMP @@ -188,9 +190,6 @@ EXPORT_SYMBOL(adb_try_handler_change); EXPORT_SYMBOL(cuda_request); EXPORT_SYMBOL(cuda_poll); #endif /* CONFIG_ADB_CUDA */ -#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32) -EXPORT_SYMBOL(_machine); -#endif #ifdef CONFIG_PPC_PMAC EXPORT_SYMBOL(sys_ctrler); #endif diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c new file mode 100644 index 000000000000..7ba42a405f41 --- /dev/null +++ b/arch/powerpc/kernel/proc_ppc64.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <linux/kernel.h> + +#include <asm/vdso_datapage.h> +#include <asm/rtas.h> +#include <asm/uaccess.h> +#include <asm/prom.h> + +static loff_t page_map_seek( struct file *file, loff_t off, int whence); +static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos); +static int page_map_mmap( struct file *file, struct vm_area_struct *vma ); + +static struct file_operations page_map_fops = { + .llseek = page_map_seek, + .read = page_map_read, + .mmap = page_map_mmap +}; + +/* + * Create the ppc64 and ppc64/rtas directories early. This allows us to + * assume that they have been previously created in drivers. + */ +static int __init proc_ppc64_create(void) +{ + struct proc_dir_entry *root; + + root = proc_mkdir("ppc64", NULL); + if (!root) + return 1; + + if (!(platform_is_pseries() || _machine == PLATFORM_CELL)) + return 0; + + if (!proc_mkdir("rtas", root)) + return 1; + + if (!proc_symlink("rtas", NULL, "ppc64/rtas")) + return 1; + + return 0; +} +core_initcall(proc_ppc64_create); + +static int __init proc_ppc64_init(void) +{ + struct proc_dir_entry *pde; + + pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL); + if (!pde) + return 1; + pde->nlink = 1; + pde->data = vdso_data; + pde->size = PAGE_SIZE; + pde->proc_fops = &page_map_fops; + + return 0; +} +__initcall(proc_ppc64_init); + +static loff_t page_map_seek( struct file *file, loff_t off, int whence) +{ + loff_t new; + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + + switch(whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + case 2: + new = dp->size + off; + break; + default: + return -EINVAL; + } + if ( new < 0 || new > dp->size ) + return -EINVAL; + return (file->f_pos = new); +} + +static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); +} + +static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + + vma->vm_flags |= VM_SHM | VM_LOCKED; + + if ((vma->vm_end - vma->vm_start) > dp->size) + return -EINVAL; + + remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT, + dp->size, vma->vm_page_prot); + return 0; +} + diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index de69fb37c731..105d5609ff57 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -201,6 +201,28 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) } #endif /* CONFIG_SPE */ +/* + * If we are doing lazy switching of CPU state (FP, altivec or SPE), + * and the current task has some state, discard it. + */ +static inline void discard_lazy_cpu_state(void) +{ +#ifndef CONFIG_SMP + preempt_disable(); + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif + preempt_enable(); +#endif /* CONFIG_SMP */ +} + int set_dabr(unsigned long dabr) { if (ppc_md.set_dabr) @@ -434,19 +456,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { kprobe_flush_task(current); - -#ifndef CONFIG_SMP - if (last_task_used_math == current) - last_task_used_math = NULL; -#ifdef CONFIG_ALTIVEC - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; -#endif -#endif /* CONFIG_SMP */ + discard_lazy_cpu_state(); } void flush_thread(void) @@ -457,20 +467,8 @@ void flush_thread(void) if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); #endif - kprobe_flush_task(current); -#ifndef CONFIG_SMP - if (last_task_used_math == current) - last_task_used_math = NULL; -#ifdef CONFIG_ALTIVEC - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; -#endif -#endif /* CONFIG_SMP */ + discard_lazy_cpu_state(); #ifdef CONFIG_PPC64 /* for now */ if (current->thread.dabr) { @@ -636,18 +634,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) } #endif -#ifndef CONFIG_SMP - if (last_task_used_math == current) - last_task_used_math = NULL; -#ifdef CONFIG_ALTIVEC - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#endif -#ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; -#endif -#endif /* CONFIG_SMP */ + discard_lazy_cpu_state(); memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); current->thread.fpscr.val = 0; #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f645adb57534..3bf968e74095 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -48,9 +48,6 @@ #include <asm/machdep.h> #include <asm/pSeries_reconfig.h> #include <asm/pci-bridge.h> -#ifdef CONFIG_PPC64 -#include <asm/systemcfg.h> -#endif #ifdef DEBUG #define DBG(fmt...) printk(KERN_ERR fmt) @@ -74,10 +71,6 @@ struct isa_reg_property { typedef int interpret_func(struct device_node *, unsigned long *, int, int, int); -extern struct rtas_t rtas; -extern struct lmb lmb; -extern unsigned long klimit; - static int __initdata dt_root_addr_cells; static int __initdata dt_root_size_cells; @@ -391,7 +384,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, #ifdef CONFIG_PPC64 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { + if (_machine == PLATFORM_POWERMAC && ic && ic->parent) { char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; @@ -1087,9 +1080,9 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); u32 *prop; - unsigned long size = 0; + unsigned long size; + char *type = of_get_flat_dt_prop(node, "device_type", &size); /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -1115,7 +1108,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size); + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL); if (prop && (*prop) > 0) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; @@ -1161,13 +1154,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node, prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); if (prop == NULL) return 0; -#ifdef CONFIG_PPC64 - systemcfg->platform = *prop; -#else #ifdef CONFIG_PPC_MULTIPLATFORM _machine = *prop; #endif -#endif #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -1264,7 +1253,14 @@ static int __init early_init_dt_scan_memory(unsigned long node, unsigned long l; /* We are scanning "memory" nodes only */ - if (type == NULL || strcmp(type, "memory") != 0) + if (type == NULL) { + /* + * The longtrail doesn't have a device_type on the + * /memory node, so look for the node called /memory@0. + */ + if (depth != 1 || strcmp(uname, "memory@0") != 0) + return 0; + } else if (strcmp(type, "memory") != 0) return 0; reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); @@ -1339,9 +1335,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); -#ifdef CONFIG_PPC64 - systemcfg->physicalMemorySize = lmb_phys_mem_size(); -#endif lmb_reserve(0, __pa(klimit)); DBG("Phys. mem: %lx\n", lmb_phys_mem_size()); @@ -1375,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np) /* No #address-cells property for the root node, default to 1 */ return 1; } +EXPORT_SYMBOL(prom_n_addr_cells); int prom_n_size_cells(struct device_node* np) @@ -1390,6 +1384,7 @@ prom_n_size_cells(struct device_node* np) /* No #size-cells property for the root node, default to 1 */ return 1; } +EXPORT_SYMBOL(prom_n_size_cells); /** * Work out the sense (active-low level / active-high edge) @@ -1908,7 +1903,7 @@ static int of_finish_dynamic_node(struct device_node *node, /* We don't support that function on PowerMac, at least * not yet */ - if (systemcfg->platform == PLATFORM_POWERMAC) + if (_machine == PLATFORM_POWERMAC) return -ENODEV; /* fix up new node's linux_phandle field */ @@ -1992,9 +1987,11 @@ int prom_add_property(struct device_node* np, struct property* prop) *next = prop; write_unlock(&devtree_lock); +#ifdef CONFIG_PROC_DEVICETREE /* try to add to proc as well if it was initialized */ if (np->pde) proc_device_tree_add_prop(np->pde, prop); +#endif /* CONFIG_PROC_DEVICETREE */ return 0; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6dc33d19fc2a..bcdc209dca85 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -94,11 +94,17 @@ extern const struct linux_logo logo_linux_clut224; #ifdef CONFIG_PPC64 #define RELOC(x) (*PTRRELOC(&(x))) #define ADDR(x) (u32) add_reloc_offset((unsigned long)(x)) +#define OF_WORKAROUNDS 0 #else #define RELOC(x) (x) #define ADDR(x) (u32) (x) +#define OF_WORKAROUNDS of_workarounds +int of_workarounds; #endif +#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ +#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */ + #define PROM_BUG() do { \ prom_printf("kernel BUG at %s line 0x%x!\n", \ RELOC(__FILE__), __LINE__); \ @@ -111,11 +117,6 @@ extern const struct linux_logo logo_linux_clut224; #define prom_debug(x...) #endif -#ifdef CONFIG_PPC32 -#define PLATFORM_POWERMAC _MACH_Pmac -#define PLATFORM_CHRP _MACH_chrp -#endif - typedef u32 prom_arg_t; @@ -128,10 +129,11 @@ struct prom_args { struct prom_t { ihandle root; - ihandle chosen; + phandle chosen; int cpu; ihandle stdout; ihandle mmumap; + ihandle memory; }; struct mem_map_entry { @@ -263,7 +265,7 @@ static int __init call_prom_ret(const char *service, int nargs, int nret, va_end(list); for (i = 0; i < nret; i++) - rets[nargs+i] = 0; + args.args[nargs+i] = 0; if (enter_prom(&args, RELOC(prom_entry)) < 0) return PROM_ERROR; @@ -360,16 +362,36 @@ static void __init prom_printf(const char *format, ...) static unsigned int __init prom_claim(unsigned long virt, unsigned long size, unsigned long align) { - int ret; struct prom_t *_prom = &RELOC(prom); - ret = call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size, - (prom_arg_t)align); - if (ret != -1 && _prom->mmumap != 0) - /* old pmacs need us to map as well */ + if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) { + /* + * Old OF requires we claim physical and virtual separately + * and then map explicitly (assuming virtual mode) + */ + int ret; + prom_arg_t result; + + ret = call_prom_ret("call-method", 5, 2, &result, + ADDR("claim"), _prom->memory, + align, size, virt); + if (ret != 0 || result == -1) + return -1; + ret = call_prom_ret("call-method", 5, 2, &result, + ADDR("claim"), _prom->mmumap, + align, size, virt); + if (ret != 0) { + call_prom("call-method", 4, 1, ADDR("release"), + _prom->memory, size, virt); + return -1; + } + /* the 0x12 is M (coherence) + PP == read/write */ call_prom("call-method", 6, 1, - ADDR("map"), _prom->mmumap, 0, size, virt, virt); - return ret; + ADDR("map"), _prom->mmumap, 0x12, size, virt, virt); + return virt; + } + return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size, + (prom_arg_t)align); } static void __init __attribute__((noreturn)) prom_panic(const char *reason) @@ -415,11 +437,52 @@ static int inline prom_getproplen(phandle node, const char *pname) return call_prom("getproplen", 2, 1, node, ADDR(pname)); } -static int inline prom_setprop(phandle node, const char *pname, - void *value, size_t valuelen) +static void add_string(char **str, const char *q) { - return call_prom("setprop", 4, 1, node, ADDR(pname), - (u32)(unsigned long) value, (u32) valuelen); + char *p = *str; + + while (*q) + *p++ = *q++; + *p++ = ' '; + *str = p; +} + +static char *tohex(unsigned int x) +{ + static char digits[] = "0123456789abcdef"; + static char result[9]; + int i; + + result[8] = 0; + i = 8; + do { + --i; + result[i] = digits[x & 0xf]; + x >>= 4; + } while (x != 0 && i > 0); + return &result[i]; +} + +static int __init prom_setprop(phandle node, const char *nodename, + const char *pname, void *value, size_t valuelen) +{ + char cmd[256], *p; + + if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL)) + return call_prom("setprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); + + /* gah... setprop doesn't work on longtrail, have to use interpret */ + p = cmd; + add_string(&p, "dev"); + add_string(&p, nodename); + add_string(&p, tohex((u32)(unsigned long) value)); + add_string(&p, tohex(valuelen)); + add_string(&p, tohex(ADDR(pname))); + add_string(&p, tohex(strlen(RELOC(pname)))); + add_string(&p, "property"); + *p = 0; + return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); } /* We can't use the standard versions because of RELOC headaches. */ @@ -980,7 +1043,7 @@ static void __init prom_instantiate_rtas(void) rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); if (!IHANDLE_VALID(rtas_inst)) { - prom_printf("opening rtas package failed"); + prom_printf("opening rtas package failed (%x)\n", rtas_inst); return; } @@ -988,7 +1051,7 @@ static void __init prom_instantiate_rtas(void) if (call_prom_ret("call-method", 3, 2, &entry, ADDR("instantiate-rtas"), - rtas_inst, base) == PROM_ERROR + rtas_inst, base) != 0 || entry == 0) { prom_printf(" failed\n"); return; @@ -997,8 +1060,10 @@ static void __init prom_instantiate_rtas(void) reserve_mem(base, size); - prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base)); - prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry)); + prom_setprop(rtas_node, "/rtas", "linux,rtas-base", + &base, sizeof(base)); + prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", + &entry, sizeof(entry)); prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); @@ -1089,10 +1154,6 @@ static void __init prom_initialize_tce_table(void) if (base < local_alloc_bottom) local_alloc_bottom = base; - /* Save away the TCE table attributes for later use. */ - prom_setprop(node, "linux,tce-base", &base, sizeof(base)); - prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize)); - /* It seems OF doesn't null-terminate the path :-( */ memset(path, 0, sizeof(path)); /* Call OF to setup the TCE hardware */ @@ -1101,6 +1162,10 @@ static void __init prom_initialize_tce_table(void) prom_printf("package-to-path failed\n"); } + /* Save away the TCE table attributes for later use. */ + prom_setprop(node, path, "linux,tce-base", &base, sizeof(base)); + prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize)); + prom_debug("TCE table: %s\n", path); prom_debug("\tnode = 0x%x\n", node); prom_debug("\tbase = 0x%x\n", base); @@ -1342,6 +1407,7 @@ static void __init prom_init_client_services(unsigned long pp) /* * For really old powermacs, we need to map things we claim. * For that, we need the ihandle of the mmu. + * Also, on the longtrail, we need to work around other bugs. */ static void __init prom_find_mmu(void) { @@ -1355,12 +1421,19 @@ static void __init prom_find_mmu(void) if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0) return; version[sizeof(version) - 1] = 0; - prom_printf("OF version is '%s'\n", version); /* XXX might need to add other versions here */ - if (strcmp(version, "Open Firmware, 1.0.5") != 0) + if (strcmp(version, "Open Firmware, 1.0.5") == 0) + of_workarounds = OF_WA_CLAIM; + else if (strncmp(version, "FirmWorks,3.", 12) == 0) { + of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; + call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); + } else return; + _prom->memory = call_prom("open", 1, 1, ADDR("/memory")); prom_getprop(_prom->chosen, "mmu", &_prom->mmumap, sizeof(_prom->mmumap)); + if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap)) + of_workarounds &= ~OF_WA_CLAIM; /* hmmm */ } #else #define prom_find_mmu() @@ -1382,16 +1455,17 @@ static void __init prom_init_stdout(void) memset(path, 0, 256); call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); val = call_prom("instance-to-package", 1, 1, _prom->stdout); - prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val)); + prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package", + &val, sizeof(val)); prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); - prom_setprop(_prom->chosen, "linux,stdout-path", - RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1); + prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path", + path, strlen(path) + 1); /* If it's a display, note it */ memset(type, 0, sizeof(type)); prom_getprop(val, "device_type", type, sizeof(type)); if (strcmp(type, RELOC("display")) == 0) - prom_setprop(val, "linux,boot-display", NULL, 0); + prom_setprop(val, path, "linux,boot-display", NULL, 0); } static void __init prom_close_stdin(void) @@ -1514,7 +1588,7 @@ static void __init prom_check_displays(void) /* Success */ prom_printf("done\n"); - prom_setprop(node, "linux,opened", NULL, 0); + prom_setprop(node, path, "linux,opened", NULL, 0); /* Setup a usable color table when the appropriate * method is available. Should update this to set-colors */ @@ -1884,9 +1958,11 @@ static void __init fixup_device_tree(void) /* interrupt on this revision of u3 is number 0 and level */ interrupts[0] = 0; interrupts[1] = 1; - prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts)); + prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts", + &interrupts, sizeof(interrupts)); parent = (u32)mpic; - prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent)); + prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", + &parent, sizeof(parent)); #endif } @@ -1922,11 +1998,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; val = RELOC(prom_initrd_start); - prom_setprop(_prom->chosen, "linux,initrd-start", &val, - sizeof(val)); + prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start", + &val, sizeof(val)); val = RELOC(prom_initrd_end); - prom_setprop(_prom->chosen, "linux,initrd-end", &val, - sizeof(val)); + prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end", + &val, sizeof(val)); reserve_mem(RELOC(prom_initrd_start), RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); @@ -1969,14 +2045,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_init_client_services(pp); /* - * Init prom stdout device + * See if this OF is old enough that we need to do explicit maps + * and other workarounds */ - prom_init_stdout(); + prom_find_mmu(); /* - * See if this OF is old enough that we need to do explicit maps + * Init prom stdout device */ - prom_find_mmu(); + prom_init_stdout(); /* * Check for an initrd @@ -1989,14 +2066,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ RELOC(of_platform) = prom_find_machine_type(); getprop_rval = RELOC(of_platform); - prom_setprop(_prom->chosen, "linux,platform", + prom_setprop(_prom->chosen, "/chosen", "linux,platform", &getprop_rval, sizeof(getprop_rval)); #ifdef CONFIG_PPC_PSERIES /* * On pSeries, inform the firmware about our capabilities */ - if (RELOC(of_platform) & PLATFORM_PSERIES) + if (RELOC(of_platform) == PLATFORM_PSERIES || + RELOC(of_platform) == PLATFORM_PSERIES_LPAR) prom_send_capabilities(); #endif @@ -2050,21 +2128,23 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * Fill in some infos for use by the kernel later on */ if (RELOC(prom_memory_limit)) - prom_setprop(_prom->chosen, "linux,memory-limit", + prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit", &RELOC(prom_memory_limit), sizeof(prom_memory_limit)); #ifdef CONFIG_PPC64 if (RELOC(ppc64_iommu_off)) - prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0); + prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off", + NULL, 0); if (RELOC(iommu_force_on)) - prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0); + prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on", + NULL, 0); if (RELOC(prom_tce_alloc_start)) { - prom_setprop(_prom->chosen, "linux,tce-alloc-start", + prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start", &RELOC(prom_tce_alloc_start), sizeof(prom_tce_alloc_start)); - prom_setprop(_prom->chosen, "linux,tce-alloc-end", + prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end", &RELOC(prom_tce_alloc_end), sizeof(prom_tce_alloc_end)); } @@ -2081,8 +2161,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_printf("copying OF device tree ...\n"); flatten_device_tree(); - /* in case stdin is USB and still active on IBM machines... */ - prom_close_stdin(); + /* + * in case stdin is USB and still active on IBM machines... + * Unfortunately quiesce crashes on some powermacs if we have + * closed stdin already (in particular the powerbook 101). + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC) + prom_close_stdin(); /* * Call OF "quiesce" method to shut down pending DMA's from diff --git a/arch/powerpc/kernel/ptrace-common.h b/arch/powerpc/kernel/ptrace-common.h new file mode 100644 index 000000000000..b1babb729673 --- /dev/null +++ b/arch/powerpc/kernel/ptrace-common.h @@ -0,0 +1,164 @@ +/* + * linux/arch/ppc64/kernel/ptrace-common.h + * + * Copyright (c) 2002 Stephen Rothwell, IBM Coproration + * Extracted from ptrace.c and ptrace32.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#ifndef _PPC64_PTRACE_COMMON_H +#define _PPC64_PTRACE_COMMON_H + +#include <linux/config.h> +#include <asm/system.h> + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) + +/* + * Get contents of register REGNO in task TASK. + */ +static inline unsigned long get_reg(struct task_struct *task, int regno) +{ + unsigned long tmp = 0; + + /* + * Put the correct FP bits in, they might be wrong as a result + * of our lazy FP restore. + */ + if (regno == PT_MSR) { + tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; + tmp |= task->thread.fpexc_mode; + } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { + tmp = ((unsigned long *)task->thread.regs)[regno]; + } + + return tmp; +} + +/* + * Write contents of register REGNO in task TASK. + */ +static inline int put_reg(struct task_struct *task, int regno, + unsigned long data) +{ + if (regno < PT_SOFTE) { + if (regno == PT_MSR) + data = (data & MSR_DEBUGCHANGE) + | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static inline void set_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr |= MSR_SE; + set_ti_thread_flag(task->thread_info, TIF_SINGLESTEP); +} + +static inline void clear_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + if (regs != NULL) + regs->msr &= ~MSR_SE; + clear_ti_thread_flag(task->thread_info, TIF_SINGLESTEP); +} + +#ifdef CONFIG_ALTIVEC +/* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +/* + * Get contents of AltiVec register state in task TASK + */ +static inline int get_vrregs(unsigned long __user *data, + struct task_struct *task) +{ + unsigned long regsize; + + /* copy AltiVec registers VR[0] .. VR[31] */ + regsize = 32 * sizeof(vector128); + if (copy_to_user(data, task->thread.vr, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); + + /* copy VSCR */ + regsize = 1 * sizeof(vector128); + if (copy_to_user(data, &task->thread.vscr, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); + + /* copy VRSAVE */ + if (put_user(task->thread.vrsave, (u32 __user *)data)) + return -EFAULT; + + return 0; +} + +/* + * Write contents of AltiVec register state into task TASK. + */ +static inline int set_vrregs(struct task_struct *task, + unsigned long __user *data) +{ + unsigned long regsize; + + /* copy AltiVec registers VR[0] .. VR[31] */ + regsize = 32 * sizeof(vector128); + if (copy_from_user(task->thread.vr, data, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); + + /* copy VSCR */ + regsize = 1 * sizeof(vector128); + if (copy_from_user(&task->thread.vscr, data, regsize)) + return -EFAULT; + data += (regsize / sizeof(unsigned long)); + + /* copy VRSAVE */ + if (get_user(task->thread.vrsave, (u32 __user *)data)) + return -EFAULT; + + return 0; +} +#endif + +static inline int ptrace_set_debugreg(struct task_struct *task, + unsigned long addr, unsigned long data) +{ + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* Ensure translation is on */ + if (data && !(data & DABR_TRANSLATION)) + return -EIO; + + task->thread.dabr = data; + return 0; +} + +#endif /* _PPC64_PTRACE_COMMON_H */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 3d2abd95c7ae..400793c71304 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -36,8 +36,9 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/system.h> + #ifdef CONFIG_PPC64 -#include <asm/ptrace-common.h> +#include "ptrace-common.h" #endif #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 91eb952e0293..61762640b877 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -33,7 +33,8 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/system.h> -#include <asm/ptrace-common.h> + +#include "ptrace-common.h" /* * does not yet catch signals sent when the child dies. diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 5bdd5b079d96..7a95b8a28354 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -32,7 +32,6 @@ #include <asm/rtas.h> #include <asm/machdep.h> /* for ppc_md */ #include <asm/time.h> -#include <asm/systemcfg.h> /* Token for Sensors */ #define KEY_SWITCH 0x0001 @@ -259,7 +258,7 @@ static int __init proc_rtas_init(void) { struct proc_dir_entry *entry; - if (!(systemcfg->platform & PLATFORM_PSERIES)) + if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR) return 1; rtas_node = of_find_node_by_name(NULL, "rtas"); diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c new file mode 100644 index 000000000000..635d3b9a8811 --- /dev/null +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -0,0 +1,105 @@ +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/timer.h> +#include <linux/init.h> +#include <linux/rtc.h> +#include <linux/delay.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/time.h> + + +#define MAX_RTC_WAIT 5000 /* 5 sec */ +#define RTAS_CLOCK_BUSY (-2) +unsigned long __init rtas_get_boot_time(void) +{ + int ret[8]; + int error, wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + wait_time = rtas_extended_busy_delay_time(error); + /* This is boot time so we spin. */ + udelay(wait_time*1000); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return 0; + } + + return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); +} + +/* NOTE: get_rtc_time will get an error if executed in interrupt context + * and if a delay is needed to read the clock. In this case we just + * silently return without updating rtc_tm. + */ +void rtas_get_rtc_time(struct rtc_time *rtc_tm) +{ + int ret[8]; + int error, wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt() && printk_ratelimit()) { + memset(&rtc_tm, 0, sizeof(struct rtc_time)); + printk(KERN_WARNING "error: reading clock" + " would delay interrupt\n"); + return; /* delay not allowed */ + } + wait_time = rtas_extended_busy_delay_time(error); + msleep(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +int rtas_set_rtc_time(struct rtc_time *tm) +{ + int error, wait_time; + u64 max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec, 0); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt()) + return 1; /* probably decrementer */ + wait_time = rtas_extended_busy_delay_time(error); + msleep(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) + printk(KERN_WARNING "error: setting the clock failed (%d)\n", + error); + + return 0; +} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 9d4e07f6f1ec..4283fa33f784 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -29,9 +29,6 @@ #include <asm/delay.h> #include <asm/uaccess.h> #include <asm/lmb.h> -#ifdef CONFIG_PPC64 -#include <asm/systemcfg.h> -#endif struct rtas_t rtas = { .lock = SPIN_LOCK_UNLOCKED @@ -671,7 +668,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + if (_machine == PLATFORM_PSERIES_LPAR) rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); #endif rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c new file mode 100644 index 000000000000..60dec2401c26 --- /dev/null +++ b/arch/powerpc/kernel/rtas_pci.c @@ -0,0 +1,455 @@ +/* + * arch/ppc64/kernel/rtas_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * + * RTAS specific routines for PCI. + * + * Based on code from pci.c, chrp_pci.c and pSeries_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/threads.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/rtas.h> +#include <asm/mpic.h> +#include <asm/ppc-pci.h> + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +static inline int config_access_valid(struct pci_dn *dn, int where) +{ + if (where < 256) + return 1; + if (where < 4096 && dn->pci_ext_config_space) + return 1; + + return 0; +} + +static int of_device_available(struct device_node * dn) +{ + char * status; + + status = get_property(dn, "status", NULL); + + if (!status) + return 1; + + if (!strcmp(status, "okay")) + return 1; + + return 0; +} + +static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +{ + int returnval = -1; + unsigned long buid, addr; + int ret; + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(pdn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | + (pdn->devfn << 8) | (where & 0xff); + buid = pdn->phb->buid; + if (buid) { + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, + addr, BUID_HI(buid), BUID_LO(buid), size); + } else { + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); + } + *val = returnval; + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (returnval == EEH_IO_ERROR_VALUE(size) && + eeh_dn_check_failure (pdn->node, NULL)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) { + struct pci_dn *pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn + && of_device_available(dn)) + return rtas_read_config(pdn, where, size, val); + } + + return PCIBIOS_DEVICE_NOT_FOUND; +} + +int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +{ + unsigned long buid, addr; + int ret; + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(pdn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | + (pdn->devfn << 8) | (where & 0xff); + buid = pdn->phb->buid; + if (buid) { + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), size, (ulong) val); + } else { + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); + } + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) { + struct pci_dn *pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn + && of_device_available(dn)) + return rtas_write_config(pdn, where, size, val); + } + return PCIBIOS_DEVICE_NOT_FOUND; +} + +struct pci_ops rtas_pci_ops = { + rtas_pci_read_config, + rtas_pci_write_config +}; + +int is_python(struct device_node *dev) +{ + char *model = (char *)get_property(dev, "model", NULL); + + if (model && strstr(model, "Python")) + return 1; + + return 0; +} + +static int get_phb_reg_prop(struct device_node *dev, + unsigned int addr_size_words, + struct reg_property64 *reg) +{ + unsigned int *ui_ptr = NULL, len; + + /* Found a PHB, now figure out where his registers are mapped. */ + ui_ptr = (unsigned int *)get_property(dev, "reg", &len); + if (ui_ptr == NULL) + return 1; + + if (addr_size_words == 1) { + reg->address = ((struct reg_property32 *)ui_ptr)->address; + reg->size = ((struct reg_property32 *)ui_ptr)->size; + } else { + *reg = *((struct reg_property64 *)ui_ptr); + } + + return 0; +} + +static void python_countermeasures(struct device_node *dev, + unsigned int addr_size_words) +{ + struct reg_property64 reg_struct; + void __iomem *chip_regs; + volatile u32 val; + + if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) + return; + + /* Python's register file is 1 MB in size. */ + chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); + + /* + * Firmware doesn't always clear this bit which is critical + * for good performance - Anton + */ + +#define PRG_CL_RESET_VALID 0x00010000 + + val = in_be32(chip_regs + 0xf6030); + if (val & PRG_CL_RESET_VALID) { + printk(KERN_INFO "Python workaround: "); + val &= ~PRG_CL_RESET_VALID; + out_be32(chip_regs + 0xf6030, val); + /* + * We must read it back for changes to + * take effect + */ + val = in_be32(chip_regs + 0xf6030); + printk("reg0: %x\n", val); + } + + iounmap(chip_regs); +} + +void __init init_pci_config_tokens (void) +{ + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +} + +unsigned long __devinit get_phb_buid (struct device_node *phb) +{ + int addr_cells; + unsigned int *buid_vals; + unsigned int len; + unsigned long buid; + + if (ibm_read_pci_config == -1) return 0; + + /* PHB's will always be children of the root node, + * or so it is promised by the current firmware. */ + if (phb->parent == NULL) + return 0; + if (phb->parent->parent) + return 0; + + buid_vals = (unsigned int *) get_property(phb, "reg", &len); + if (buid_vals == NULL) + return 0; + + addr_cells = prom_n_addr_cells(phb); + if (addr_cells == 1) { + buid = (unsigned long) buid_vals[0]; + } else { + buid = (((unsigned long)buid_vals[0]) << 32UL) | + (((unsigned long)buid_vals[1]) & 0xffffffff); + } + return buid; +} + +static int phb_set_bus_ranges(struct device_node *dev, + struct pci_controller *phb) +{ + int *bus_range; + unsigned int len; + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + return 1; + } + + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + return 0; +} + +static int __devinit setup_phb(struct device_node *dev, + struct pci_controller *phb, + unsigned int addr_size_words) +{ + if (is_python(dev)) + python_countermeasures(dev, addr_size_words); + + if (phb_set_bus_ranges(dev, phb)) + return 1; + + phb->ops = &rtas_pci_ops; + phb->buid = get_phb_buid(dev); + + return 0; +} + +unsigned long __init find_and_init_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + unsigned int root_size_cells = 0; + unsigned int index; + unsigned int *opprop = NULL; + struct device_node *root = of_find_node_by_path("/"); + + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + opprop = (unsigned int *)get_property(root, + "platform-open-pic", NULL); + } + + root_size_cells = prom_n_size_cells(root); + + index = 0; + + for (node = of_get_next_child(root, NULL); + node != NULL; + node = of_get_next_child(root, node)) { + if (node->type == NULL || strcmp(node->type, "pci") != 0) + continue; + + phb = pcibios_alloc_controller(node); + if (!phb) + continue; + setup_phb(node, phb, root_size_cells); + pci_process_bridge_OF_ranges(phb, node, 0); + pci_setup_phb_io(phb, index == 0); +#ifdef CONFIG_PPC_PSERIES + if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { + int addr = root_size_cells * (index + 2) - 1; + mpic_assign_isu(pSeries_mpic, index, opprop[addr]); + } +#endif + index++; + } + + of_node_put(root); + pci_devs_phb_init(); + + /* + * pci_probe_only and pci_assign_all_buses can be set via properties + * in chosen. + */ + if (of_chosen) { + int *prop; + + prop = (int *)get_property(of_chosen, "linux,pci-probe-only", + NULL); + if (prop) + pci_probe_only = *prop; + + prop = (int *)get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); + if (prop) + pci_assign_all_buses = *prop; + } + + return 0; +} + +struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) +{ + struct device_node *root = of_find_node_by_path("/"); + unsigned int root_size_cells = 0; + struct pci_controller *phb; + int primary; + + root_size_cells = prom_n_size_cells(root); + + primary = list_empty(&hose_list); + phb = pcibios_alloc_controller(dn); + if (!phb) + return NULL; + setup_phb(dn, phb, root_size_cells); + pci_process_bridge_OF_ranges(phb, dn, primary); + + pci_setup_phb_io_dynamic(phb, primary); + of_node_put(root); + + pci_devs_phb_init_dynamic(phb); + scan_phb(phb); + + return phb; +} +EXPORT_SYMBOL(init_phb_dynamic); + +/* RPA-specific bits for removing PHBs */ +int pcibios_remove_root_bus(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + res = b->resource[0]; + if (!res->flags) { + printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, + b->name); + return 1; + } + + rc = unmap_bus_range(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + if (release_resource(res)) { + printk(KERN_ERR "%s: failed to release IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + for (i = 1; i < 3; ++i) { + res = b->resource[i]; + if (!res->flags && i == 0) { + printk(KERN_ERR "%s: no MEM resource for PHB %s\n", + __FUNCTION__, b->name); + return 1; + } + if (res->flags && release_resource(res)) { + printk(KERN_ERR + "%s: failed to release IO %d on bus %s\n", + __FUNCTION__, i, b->name); + return 1; + } + } + + list_del(&phb->list_node); + pcibios_free_controller(phb); + + return 0; +} +EXPORT_SYMBOL(pcibios_remove_root_bus); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index e22856ecb5a0..bd3eb4292b53 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,6 +33,7 @@ #include <asm/io.h> #include <asm/prom.h> #include <asm/processor.h> +#include <asm/vdso_datapage.h> #include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> @@ -51,15 +52,26 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/lmb.h> +#include <asm/xmon.h> + +#include "setup.h" #undef DEBUG #ifdef DEBUG +#include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) #endif +#ifdef CONFIG_PPC_MULTIPLATFORM +int _machine = 0; +EXPORT_SYMBOL(_machine); +#endif + +unsigned long klimit = (unsigned long) _end; + /* * This still seems to be needed... -- paulus */ @@ -433,10 +445,8 @@ void __init check_for_initrd(void) if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && initrd_end > initrd_start) ROOT_DEV = Root_RAM0; - else { - printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end); + else initrd_start = initrd_end = 0; - } if (initrd_start) printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); @@ -510,8 +520,8 @@ void __init smp_setup_cpu_maps(void) * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && - (dn = of_find_node_by_path("/rtas"))) { + if (_machine == PLATFORM_PSERIES_LPAR && + (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; unsigned int *ireg; @@ -555,7 +565,27 @@ void __init smp_setup_cpu_maps(void) cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); } - systemcfg->processorCount = num_present_cpus(); + vdso_data->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ } #endif /* CONFIG_SMP */ + +#ifdef CONFIG_XMON +static int __init early_xmon(char *p) +{ + /* ensure xmon is enabled */ + if (p) { + if (strncmp(p, "on", 2) == 0) + xmon_init(1); + if (strncmp(p, "off", 3) == 0) + xmon_init(0); + if (strncmp(p, "early", 5) != 0) + return 0; + } + xmon_init(1); + debugger(NULL); + + return 0; +} +early_param("xmon", early_xmon); +#endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 000000000000..2ebba755272e --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,6 @@ +#ifndef _POWERPC_KERNEL_SETUP_H +#define _POWERPC_KERNEL_SETUP_H + +void check_for_initrd(void); + +#endif /* _POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 3af2631e3fab..e5694335bf10 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,6 +40,8 @@ #include <asm/xmon.h> #include <asm/time.h> +#include "setup.h" + #define DBG(fmt...) #if defined CONFIG_KGDB @@ -55,10 +57,6 @@ extern void power4_idle(void); boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; -/* XXX should go elsewhere */ -int __irq_offset_value; -EXPORT_SYMBOL(__irq_offset_value); - int boot_cpuid; EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; @@ -70,8 +68,6 @@ unsigned int DMA_MODE_WRITE; int have_of = 1; #ifdef CONFIG_PPC_MULTIPLATFORM -int _machine = 0; - extern void prep_init(void); extern void pmac_init(void); extern void chrp_init(void); @@ -279,7 +275,6 @@ arch_initcall(ppc_init); /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { - extern char *klimit; extern void do_init_bootmem(void); /* so udelay does something sensible, assume <= 1000 bogomips */ @@ -303,14 +298,9 @@ void __init setup_arch(char **cmdline_p) pmac_feature_init(); /* New cool way */ #endif -#ifdef CONFIG_XMON - xmon_map_scc(); - if (strstr(cmd_line, "xmon")) { - xmon_init(1); - debugger(NULL); - } -#endif /* CONFIG_XMON */ - if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab); +#ifdef CONFIG_XMON_DEFAULT + xmon_init(1); +#endif #if defined(CONFIG_KGDB) if (ppc_md.kgdb_map_scc) @@ -343,7 +333,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = (unsigned long) klimit; + init_mm.brk = klimit; /* Save unparsed command line copy for /proc/cmdline */ strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0471e843b6c5..e3fb78397dc6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -57,9 +57,11 @@ #include <asm/lmb.h> #include <asm/iseries/it_lp_naca.h> #include <asm/firmware.h> -#include <asm/systemcfg.h> #include <asm/xmon.h> #include <asm/udbg.h> +#include <asm/kexec.h> + +#include "setup.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -94,22 +96,21 @@ extern void udbg_init_maple_realmode(void); do { udbg_putc = call_rtas_display_status_delay; } while(0) #endif -/* extern void *stab; */ -extern unsigned long klimit; - -extern void mm_init_ppc64(void); -extern void stab_initialize(unsigned long stab); -extern void htab_initialize(void); -extern void early_init_devtree(void *flat_dt); -extern void unflatten_device_tree(void); - int have_of = 1; int boot_cpuid = 0; int boot_cpuid_phys = 0; dev_t boot_dev; u64 ppc64_pft_size; -struct ppc64_caches ppc64_caches; +/* Pick defaults since we might want to patch instructions + * before we've read this from the device tree. + */ +struct ppc64_caches ppc64_caches = { + .dline_size = 0x80, + .log_dline_size = 7, + .iline_size = 0x80, + .log_iline_size = 7 +}; EXPORT_SYMBOL_GPL(ppc64_caches); /* @@ -254,11 +255,10 @@ void __init early_setup(unsigned long dt_ptr) * Iterate all ppc_md structures until we find the proper * one for the current machine type */ - DBG("Probing machine type for platform %x...\n", - systemcfg->platform); + DBG("Probing machine type for platform %x...\n", _machine); for (mach = machines; *mach; mach++) { - if ((*mach)->probe(systemcfg->platform)) + if ((*mach)->probe(_machine)) break; } /* What can we do if we didn't find ? */ @@ -290,6 +290,28 @@ void __init early_setup(unsigned long dt_ptr) DBG(" <- early_setup()\n"); } +#ifdef CONFIG_SMP +void early_setup_secondary(void) +{ + struct paca_struct *lpaca = get_paca(); + + /* Mark enabled in PACA */ + lpaca->proc_enabled = 0; + + /* Initialize hash table for that CPU */ + htab_initialize_secondary(); + + /* Initialize STAB/SLB. We use a virtual address as it works + * in real mode on pSeries and we want a virutal address on + * iSeries anyway + */ + if (cpu_has_feature(CPU_FTR_SLB)) + slb_initialize(); + else + stab_initialize(lpaca->stab_addr); +} + +#endif /* CONFIG_SMP */ #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) void smp_release_cpus(void) @@ -315,7 +337,8 @@ void smp_release_cpus(void) #endif /* CONFIG_SMP || CONFIG_KEXEC */ /* - * Initialize some remaining members of the ppc64_caches and systemcfg structures + * Initialize some remaining members of the ppc64_caches and systemcfg + * structures * (at least until we get rid of them completely). This is mostly some * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland @@ -340,7 +363,7 @@ static void __init initialize_cache_info(void) const char *dc, *ic; /* Then read cache informations */ - if (systemcfg->platform == PLATFORM_POWERMAC) { + if (_machine == PLATFORM_POWERMAC) { dc = "d-cache-block-size"; ic = "i-cache-block-size"; } else { @@ -360,9 +383,8 @@ static void __init initialize_cache_info(void) DBG("Argh, can't find dcache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - systemcfg->dcache_size = ppc64_caches.dsize = size; - systemcfg->dcache_line_size = - ppc64_caches.dline_size = lsize; + ppc64_caches.dsize = size; + ppc64_caches.dline_size = lsize; ppc64_caches.log_dline_size = __ilog2(lsize); ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; @@ -378,20 +400,13 @@ static void __init initialize_cache_info(void) DBG("Argh, can't find icache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - systemcfg->icache_size = ppc64_caches.isize = size; - systemcfg->icache_line_size = - ppc64_caches.iline_size = lsize; + ppc64_caches.isize = size; + ppc64_caches.iline_size = lsize; ppc64_caches.log_iline_size = __ilog2(lsize); ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; } } - /* Add an eye catcher and the systemcfg layout version number */ - strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); - systemcfg->version.major = SYSTEMCFG_MAJOR; - systemcfg->version.minor = SYSTEMCFG_MINOR; - systemcfg->processor = mfspr(SPRN_PVR); - DBG(" <- initialize_cache_info()\n"); } @@ -409,6 +424,10 @@ void __init setup_system(void) */ unflatten_device_tree(); +#ifdef CONFIG_KEXEC + kexec_setup(); /* requires unflattened device tree. */ +#endif + /* * Fill the ppc64_caches & systemcfg structures with informations * retreived from the device-tree. Need to be called before @@ -478,15 +497,14 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("systemcfg = 0x%p\n", systemcfg); - printk("systemcfg->platform = 0x%x\n", systemcfg->platform); - printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); - printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); + printk("ppc64_interrupt_controller = 0x%ld\n", + ppc64_interrupt_controller); + printk("platform = 0x%x\n", _machine); + printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + ppc64_caches.dline_size); printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + ppc64_caches.iline_size); printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); printk("-----------------------------------------------------\n"); @@ -551,33 +569,6 @@ static void __init emergency_stack_init(void) } /* - * Called from setup_arch to initialize the bitmap of available - * syscalls in the systemcfg page - */ -void __init setup_syscall_map(void) -{ - unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long sys_ni_syscall; - - - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i*2] != sys_ni_syscall) { - count64++; - systemcfg->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } - if (sys_call_table[i*2+1] != sys_ni_syscall) { - count32++; - systemcfg->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } - } - printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n", - count32, count64); -} - -/* * Called into from start_kernel, after lock_kernel has been called. * Initializes bootmem, which is unsed to manage page allocation until * mem_init is called. @@ -618,9 +609,6 @@ void __init setup_arch(char **cmdline_p) do_init_bootmem(); sparse_init(); - /* initialize the syscall map in systemcfg */ - setup_syscall_map(); - #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif @@ -858,26 +846,6 @@ int check_legacy_ioport(unsigned long base_port) } EXPORT_SYMBOL(check_legacy_ioport); -#ifdef CONFIG_XMON -static int __init early_xmon(char *p) -{ - /* ensure xmon is enabled */ - if (p) { - if (strncmp(p, "on", 2) == 0) - xmon_init(1); - if (strncmp(p, "off", 3) == 0) - xmon_init(0); - if (strncmp(p, "early", 5) != 0) - return 0; - } - xmon_init(1); - debugger(NULL); - - return 0; -} -early_param("xmon", early_xmon); -#endif - void cpu_die(void) { if (ppc_md.cpu_die) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 081d931eae48..5a2eba60dd39 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -42,10 +42,11 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/sigcontext.h> +#include <asm/vdso.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> -#include <asm/vdso.h> #else #include <asm/ucontext.h> #include <asm/pgtable.h> @@ -402,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, ELF_NFPREG * sizeof(double))) return 1; - current->thread.fpscr.val = 0; /* turn off all fp exceptions */ - #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { @@ -808,18 +807,18 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; -#ifdef CONFIG_PPC64 if (vdso32_rt_sigtramp && current->thread.vdso_base) { if (save_user_regs(regs, frame, 0)) goto badframe; regs->link = current->thread.vdso_base + vdso32_rt_sigtramp; - } else -#endif - { + } else { if (save_user_regs(regs, frame, __NR_rt_sigreturn)) goto badframe; regs->link = (unsigned long) frame->tramp; } + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; regs->gpr[1] = newsp; @@ -1089,19 +1088,18 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, || __put_user(sig, &sc->signal)) goto badframe; -#ifdef CONFIG_PPC64 if (vdso32_sigtramp && current->thread.vdso_base) { if (save_user_regs(regs, &frame->mctx, 0)) goto badframe; regs->link = current->thread.vdso_base + vdso32_sigtramp; - } else -#endif - { + } else { if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) goto badframe; regs->link = (unsigned long) frame->mctx.tramp; } + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; regs->gpr[1] = newsp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 58194e150711..1decf2785530 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -131,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, flush_fp_to_thread(current); - /* Make sure signal doesn't get spurrious FP exceptions */ - current->thread.fpscr.val = 0; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); @@ -423,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, if (err) goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ + current->thread.fpscr.val = 0; + /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->thread.vdso_base) { regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 36d67a8d7cbb..30374d2f88e5 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -44,19 +44,21 @@ #include <asm/cputable.h> #include <asm/system.h> #include <asm/mpic.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> #endif -int smp_hw_index[NR_CPUS]; -struct thread_info *secondary_ti; - #ifdef DEBUG +#include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) #endif +int smp_hw_index[NR_CPUS]; +struct thread_info *secondary_ti; + cpumask_t cpu_possible_map = CPU_MASK_NONE; cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; @@ -368,9 +370,11 @@ int generic_cpu_disable(void) if (cpu == boot_cpuid) return -EBUSY; - systemcfg->processorCount--; cpu_clear(cpu, cpu_online_map); +#ifdef CONFIG_PPC64 + vdso_data->processorCount--; fixup_irqs(cpu_online_map); +#endif return 0; } @@ -388,9 +392,11 @@ int generic_cpu_enable(unsigned int cpu) while (!cpu_online(cpu)) cpu_relax(); +#ifdef CONFIG_PPC64 fixup_irqs(cpu_online_map); /* counter the irq disable in fixup_irqs */ local_irq_enable(); +#endif return 0; } @@ -419,7 +425,9 @@ void generic_mach_cpu_die(void) while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); +#ifdef CONFIG_PPC64 flush_tlb_pending(); +#endif cpu_set(cpu, cpu_online_map); local_irq_enable(); } diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index a8210ed5c686..9c921d1c4084 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -52,7 +52,6 @@ #include <asm/semaphore.h> #include <asm/time.h> #include <asm/mmu_context.h> -#include <asm/systemcfg.h> #include <asm/ppc-pci.h> /* readdir & getdents */ diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index f72ced11212d..91b93d917b64 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -247,7 +247,7 @@ long ppc64_personality(unsigned long personality) #define OVERRIDE_MACHINE 0 #endif -static inline int override_machine(char *mach) +static inline int override_machine(char __user *mach) { if (OVERRIDE_MACHINE) { /* change ppc64 to ppc */ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c new file mode 100644 index 000000000000..0f0c3a9ae2e5 --- /dev/null +++ b/arch/powerpc/kernel/sysfs.c @@ -0,0 +1,383 @@ +#include <linux/config.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/nodemask.h> +#include <linux/cpumask.h> +#include <linux/notifier.h> + +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/firmware.h> +#include <asm/hvcall.h> +#include <asm/prom.h> +#include <asm/paca.h> +#include <asm/lppaca.h> +#include <asm/machdep.h> +#include <asm/smp.h> + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +/* SMT stuff */ + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* default to snooze disabled */ +DEFINE_PER_CPU(unsigned long, smt_snooze_delay); + +static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, + size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + ssize_t ret; + unsigned long snooze; + + ret = sscanf(buf, "%lu", &snooze); + if (ret != 1) + return -EINVAL; + + per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + + return count; +} + +static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + + return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); +} + +static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, + store_smt_snooze_delay); + +/* Only parse OF options if the matching cmdline option was not specified */ +static int smt_snooze_cmdline; + +static int __init smt_setup(void) +{ + struct device_node *options; + unsigned int *val; + unsigned int cpu; + + if (!cpu_has_feature(CPU_FTR_SMT)) + return 1; + + options = find_path_device("/options"); + if (!options) + return 1; + + val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", + NULL); + if (!smt_snooze_cmdline && val) { + for_each_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = *val; + } + + return 1; +} +__initcall(smt_setup); + +static int __init setup_smt_snooze_delay(char *str) +{ + unsigned int cpu; + int snooze; + + if (!cpu_has_feature(CPU_FTR_SMT)) + return 1; + + smt_snooze_cmdline = 1; + + if (get_option(&str, &snooze)) { + for_each_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; + } + + return 1; +} +__setup("smt-snooze-delay=", setup_smt_snooze_delay); + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Enabling PMCs will slow partition context switch times so we only do + * it the first time we write to the PMCs. + */ + +static DEFINE_PER_CPU(char, pmcs_enabled); + +void ppc64_enable_pmcs(void) +{ + /* Only need to enable them once */ + if (__get_cpu_var(pmcs_enabled)) + return; + + __get_cpu_var(pmcs_enabled) = 1; + + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); +} +EXPORT_SYMBOL(ppc64_enable_pmcs); + +/* XXX convert to rusty's on_one_cpu */ +static unsigned long run_on_cpu(unsigned long cpu, + unsigned long (*func)(unsigned long), + unsigned long arg) +{ + cpumask_t old_affinity = current->cpus_allowed; + unsigned long ret; + + /* should return -EINVAL to userspace */ + if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) + return 0; + + ret = func(arg); + + set_cpus_allowed(current, old_affinity); + + return ret; +} + +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +static unsigned long read_##NAME(unsigned long junk) \ +{ \ + return mfspr(ADDRESS); \ +} \ +static unsigned long write_##NAME(unsigned long val) \ +{ \ + ppc64_enable_pmcs(); \ + mtspr(ADDRESS, val); \ + return 0; \ +} \ +static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ + return sprintf(buf, "%lx\n", val); \ +} \ +static ssize_t __attribute_used__ \ + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + unsigned long val; \ + int ret = sscanf(buf, "%lx", &val); \ + if (ret != 1) \ + return -EINVAL; \ + run_on_cpu(cpu->sysdev.id, write_##NAME, val); \ + return count; \ +} + +SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); +SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(pmc1, SPRN_PMC1); +SYSFS_PMCSETUP(pmc2, SPRN_PMC2); +SYSFS_PMCSETUP(pmc3, SPRN_PMC3); +SYSFS_PMCSETUP(pmc4, SPRN_PMC4); +SYSFS_PMCSETUP(pmc5, SPRN_PMC5); +SYSFS_PMCSETUP(pmc6, SPRN_PMC6); +SYSFS_PMCSETUP(pmc7, SPRN_PMC7); +SYSFS_PMCSETUP(pmc8, SPRN_PMC8); +SYSFS_PMCSETUP(purr, SPRN_PURR); + +static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0); +static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1); +static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1); +static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2); +static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3); +static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4); +static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5); +static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6); +static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7); +static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8); +static SYSDEV_ATTR(purr, 0600, show_purr, NULL); + +static void register_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + +#ifndef CONFIG_PPC_ISERIES + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_create_file(s, &attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + + sysdev_create_file(s, &attr_mmcr0); + sysdev_create_file(s, &attr_mmcr1); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + sysdev_create_file(s, &attr_mmcra); + + if (cur_cpu_spec->num_pmcs >= 1) + sysdev_create_file(s, &attr_pmc1); + if (cur_cpu_spec->num_pmcs >= 2) + sysdev_create_file(s, &attr_pmc2); + if (cur_cpu_spec->num_pmcs >= 3) + sysdev_create_file(s, &attr_pmc3); + if (cur_cpu_spec->num_pmcs >= 4) + sysdev_create_file(s, &attr_pmc4); + if (cur_cpu_spec->num_pmcs >= 5) + sysdev_create_file(s, &attr_pmc5); + if (cur_cpu_spec->num_pmcs >= 6) + sysdev_create_file(s, &attr_pmc6); + if (cur_cpu_spec->num_pmcs >= 7) + sysdev_create_file(s, &attr_pmc7); + if (cur_cpu_spec->num_pmcs >= 8) + sysdev_create_file(s, &attr_pmc8); + + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_create_file(s, &attr_purr); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unregister_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + + BUG_ON(c->no_control); + +#ifndef CONFIG_PPC_ISERIES + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_remove_file(s, &attr_smt_snooze_delay); +#endif + + /* PMC stuff */ + + sysdev_remove_file(s, &attr_mmcr0); + sysdev_remove_file(s, &attr_mmcr1); + + if (cpu_has_feature(CPU_FTR_MMCRA)) + sysdev_remove_file(s, &attr_mmcra); + + if (cur_cpu_spec->num_pmcs >= 1) + sysdev_remove_file(s, &attr_pmc1); + if (cur_cpu_spec->num_pmcs >= 2) + sysdev_remove_file(s, &attr_pmc2); + if (cur_cpu_spec->num_pmcs >= 3) + sysdev_remove_file(s, &attr_pmc3); + if (cur_cpu_spec->num_pmcs >= 4) + sysdev_remove_file(s, &attr_pmc4); + if (cur_cpu_spec->num_pmcs >= 5) + sysdev_remove_file(s, &attr_pmc5); + if (cur_cpu_spec->num_pmcs >= 6) + sysdev_remove_file(s, &attr_pmc6); + if (cur_cpu_spec->num_pmcs >= 7) + sysdev_remove_file(s, &attr_pmc7); + if (cur_cpu_spec->num_pmcs >= 8) + sysdev_remove_file(s, &attr_pmc8); + + if (cpu_has_feature(CPU_FTR_SMT)) + sysdev_remove_file(s, &attr_purr); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __devinit sysfs_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + + switch (action) { + case CPU_ONLINE: + register_cpu_online(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + unregister_cpu_online(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata sysfs_cpu_nb = { + .notifier_call = sysfs_cpu_notify, +}; + +/* NUMA stuff */ + +#ifdef CONFIG_NUMA +static struct node node_devices[MAX_NUMNODES]; + +static void register_nodes(void) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (node_online(i)) { + int p_node = parent_node(i); + struct node *parent = NULL; + + if (p_node != i) + parent = &node_devices[p_node]; + + register_node(&node_devices[i], i, parent); + } + } +} +#else +static void register_nodes(void) +{ + return; +} +#endif + +/* Only valid if CPU is present. */ +static ssize_t show_physical_id(struct sys_device *dev, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); +} +static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); + +static int __init topology_init(void) +{ + int cpu; + struct node *parent = NULL; + + register_nodes(); + + register_cpu_notifier(&sysfs_cpu_nb); + + for_each_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + +#ifdef CONFIG_NUMA + /* The node to which a cpu belongs can't be known + * until the cpu is made present. + */ + parent = NULL; + if (cpu_present(cpu)) + parent = &node_devices[cpu_to_node(cpu)]; +#endif + /* + * For now, we just see if the system supports making + * the RTAS calls for CPU hotplug. But, there may be a + * more comprehensive way to do this for an individual + * CPU. For instance, the boot cpu might never be valid + * for hotplugging. + */ + if (!ppc_md.cpu_die) + c->no_control = 1; + + if (cpu_online(cpu) || (c->no_control == 0)) { + register_cpu(c, cpu, parent); + + sysdev_create_file(&c->sysdev, &attr_physical_id); + } + + if (cpu_online(cpu)) + register_cpu_online(cpu); + } + + return 0; +} +__initcall(topology_init); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a6282b625b44..de8479769bb7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -62,8 +62,8 @@ #include <asm/irq.h> #include <asm/div64.h> #include <asm/smp.h> +#include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 -#include <asm/systemcfg.h> #include <asm/firmware.h> #endif #ifdef CONFIG_PPC_ISERIES @@ -130,6 +130,34 @@ unsigned long tb_last_stamp; */ DEFINE_PER_CPU(unsigned long, last_jiffy); +void __delay(unsigned long loops) +{ + unsigned long start; + int diff; + + if (__USE_RTC()) { + start = get_rtcl(); + do { + /* the RTCL register wraps at 1000000000 */ + diff = get_rtcl() - start; + if (diff < 0) + diff += 1000000000; + } while (diff < loops); + } else { + start = get_tbl(); + while (get_tbl() - start < loops) + HMT_low(); + HMT_medium(); + } +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + static __inline__ void timer_check_rtc(void) { /* @@ -261,7 +289,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, do_gtod.varp = temp_varp; do_gtod.var_idx = temp_idx; -#ifdef CONFIG_PPC64 /* * tb_update_count is used to allow the userspace gettimeofday code * to assure itself that it sees a consistent view of the tb_to_xs and @@ -271,14 +298,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, * tb_to_xs and stamp_xsec values are consistent. If not, then it * loops back and reads them again until this criteria is met. */ - ++(systemcfg->tb_update_count); + ++(vdso_data->tb_update_count); smp_wmb(); - systemcfg->tb_orig_stamp = new_tb_stamp; - systemcfg->stamp_xsec = new_stamp_xsec; - systemcfg->tb_to_xs = new_tb_to_xs; + vdso_data->tb_orig_stamp = new_tb_stamp; + vdso_data->stamp_xsec = new_stamp_xsec; + vdso_data->tb_to_xs = new_tb_to_xs; + vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; smp_wmb(); - ++(systemcfg->tb_update_count); -#endif + ++(vdso_data->tb_update_count); } /* @@ -357,8 +385,8 @@ static void iSeries_tb_recal(void) do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; tb_to_xs = divres.result_low; do_gtod.varp->tb_to_xs = tb_to_xs; - systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; - systemcfg->tb_to_xs = tb_to_xs; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_data->tb_to_xs = tb_to_xs; } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" @@ -483,6 +511,8 @@ void __init smp_space_timers(unsigned int max_cpus) unsigned long offset = tb_ticks_per_jiffy / max_cpus; unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid); + /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ + previous_tb -= tb_ticks_per_jiffy; for_each_cpu(i) { if (i != boot_cpuid) { previous_tb += offset; @@ -558,10 +588,8 @@ int do_settimeofday(struct timespec *tv) new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs; update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs); -#ifdef CONFIG_PPC64 - systemcfg->tz_minuteswest = sys_tz.tz_minuteswest; - systemcfg->tz_dsttime = sys_tz.tz_dsttime; -#endif + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; write_sequnlock_irqrestore(&xtime_lock, flags); clock_was_set(); @@ -710,13 +738,12 @@ void __init time_init(void) do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; do_gtod.varp->tb_to_xs = tb_to_xs; do_gtod.tb_to_us = tb_to_us; -#ifdef CONFIG_PPC64 - systemcfg->tb_orig_stamp = tb_last_jiffy; - systemcfg->tb_update_count = 0; - systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; - systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; - systemcfg->tb_to_xs = tb_to_xs; -#endif + + vdso_data->tb_orig_stamp = tb_last_jiffy; + vdso_data->tb_update_count = 0; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; + vdso_data->tb_to_xs = tb_to_xs; time_freq = 0; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0578f8387603..1511454c4690 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -49,7 +49,6 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #include <asm/processor.h> -#include <asm/systemcfg.h> #endif #ifdef CONFIG_PPC64 /* XXX */ @@ -129,7 +128,7 @@ int die(const char *str, struct pt_regs *regs, long err) nl = 1; #endif #ifdef CONFIG_PPC64 - switch (systemcfg->platform) { + switch (_machine) { case PLATFORM_PSERIES: printk("PSERIES "); nl = 1; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c new file mode 100644 index 000000000000..0d878e72fc44 --- /dev/null +++ b/arch/powerpc/kernel/udbg.c @@ -0,0 +1,125 @@ +/* + * polling mode stateless debugging stuff, originally for NS16550 Serial Ports + * + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <stdarg.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/console.h> +#include <asm/processor.h> + +void (*udbg_putc)(unsigned char c); +unsigned char (*udbg_getc)(void); +int (*udbg_getc_poll)(void); + +/* udbg library, used by xmon et al */ +void udbg_puts(const char *s) +{ + if (udbg_putc) { + char c; + + if (s && *s != '\0') { + while ((c = *s++) != '\0') + udbg_putc(c); + } + } +#if 0 + else { + printk("%s", s); + } +#endif +} + +int udbg_write(const char *s, int n) +{ + int remain = n; + char c; + + if (!udbg_putc) + return 0; + + if (s && *s != '\0') { + while (((c = *s++) != '\0') && (remain-- > 0)) { + udbg_putc(c); + } + } + + return n - remain; +} + +int udbg_read(char *buf, int buflen) +{ + char c, *p = buf; + int i; + + if (!udbg_getc) + return 0; + + for (i = 0; i < buflen; ++i) { + do { + c = udbg_getc(); + } while (c == 0x11 || c == 0x13); + if (c == 0) + break; + *p++ = c; + } + + return i; +} + +#define UDBG_BUFSIZE 256 +void udbg_printf(const char *fmt, ...) +{ + unsigned char buf[UDBG_BUFSIZE]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, args); + udbg_puts(buf); + va_end(args); +} + +/* + * Early boot console based on udbg + */ +static void udbg_console_write(struct console *con, const char *s, + unsigned int n) +{ + udbg_write(s, n); +} + +static struct console udbg_console = { + .name = "udbg", + .write = udbg_console_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +static int early_console_initialized; + +void __init disable_early_printk(void) +{ + if (!early_console_initialized) + return; + unregister_console(&udbg_console); + early_console_initialized = 0; +} + +/* called by setup_system */ +void register_early_udbg_console(void) +{ + early_console_initialized = 1; + register_console(&udbg_console); +} + +#if 0 /* if you want to use this as a regular output console */ +console_initcall(register_udbg_console); +#endif diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c new file mode 100644 index 000000000000..9313574ab935 --- /dev/null +++ b/arch/powerpc/kernel/udbg_16550.c @@ -0,0 +1,123 @@ +/* + * udbg for for NS16550 compatable serial ports + * + * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <asm/udbg.h> +#include <asm/io.h> + +extern u8 real_readb(volatile u8 __iomem *addr); +extern void real_writeb(u8 data, volatile u8 __iomem *addr); + +struct NS16550 { + /* this struct must be packed */ + unsigned char rbr; /* 0 */ + unsigned char ier; /* 1 */ + unsigned char fcr; /* 2 */ + unsigned char lcr; /* 3 */ + unsigned char mcr; /* 4 */ + unsigned char lsr; /* 5 */ + unsigned char msr; /* 6 */ + unsigned char scr; /* 7 */ +}; + +#define thr rbr +#define iir fcr +#define dll rbr +#define dlm ier +#define dlab lcr + +#define LSR_DR 0x01 /* Data ready */ +#define LSR_OE 0x02 /* Overrun */ +#define LSR_PE 0x04 /* Parity error */ +#define LSR_FE 0x08 /* Framing error */ +#define LSR_BI 0x10 /* Break */ +#define LSR_THRE 0x20 /* Xmit holding register empty */ +#define LSR_TEMT 0x40 /* Xmitter empty */ +#define LSR_ERR 0x80 /* Error */ + +static volatile struct NS16550 __iomem *udbg_comport; + +static void udbg_550_putc(unsigned char c) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + out_8(&udbg_comport->thr, c); + if (c == '\n') + udbg_550_putc('\r'); + } +} + +static int udbg_550_getc_poll(void) +{ + if (udbg_comport) { + if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0) + return in_8(&udbg_comport->rbr); + else + return -1; + } + return -1; +} + +static unsigned char udbg_550_getc(void) +{ + if (udbg_comport) { + while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0) + /* wait for char */; + return in_8(&udbg_comport->rbr); + } + return 0; +} + +void udbg_init_uart(void __iomem *comport, unsigned int speed) +{ + u16 dll = speed ? (115200 / speed) : 12; + + if (comport) { + udbg_comport = (struct NS16550 __iomem *)comport; + out_8(&udbg_comport->lcr, 0x00); + out_8(&udbg_comport->ier, 0xff); + out_8(&udbg_comport->ier, 0x00); + out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */ + out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600, + 3 = 38400, 12 = 9600 baud */ + out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero + for fast rates; */ + out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */ + out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */ + out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */ + udbg_putc = udbg_550_putc; + udbg_getc = udbg_550_getc; + udbg_getc_poll = udbg_550_getc_poll; + } +} + +#ifdef CONFIG_PPC_MAPLE +void udbg_maple_real_putc(unsigned char c) +{ + if (udbg_comport) { + while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + real_writeb(c, &udbg_comport->thr); eieio(); + if (c == '\n') + udbg_maple_real_putc('\r'); + } +} + +void udbg_init_maple_realmode(void) +{ + udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8; + + udbg_putc = udbg_maple_real_putc; + udbg_getc = NULL; + udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC_MAPLE */ diff --git a/arch/powerpc/kernel/udbg_scc.c b/arch/powerpc/kernel/udbg_scc.c new file mode 100644 index 000000000000..820c53551507 --- /dev/null +++ b/arch/powerpc/kernel/udbg_scc.c @@ -0,0 +1,135 @@ +/* + * udbg for for zilog scc ports as found on Apple PowerMacs + * + * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <asm/udbg.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pmac_feature.h> + +extern u8 real_readb(volatile u8 __iomem *addr); +extern void real_writeb(u8 data, volatile u8 __iomem *addr); + +#define SCC_TXRDY 4 +#define SCC_RXRDY 1 + +static volatile u8 __iomem *sccc; +static volatile u8 __iomem *sccd; + +static void udbg_scc_putc(unsigned char c) +{ + if (sccc) { + while ((in_8(sccc) & SCC_TXRDY) == 0) + ; + out_8(sccd, c); + if (c == '\n') + udbg_scc_putc('\r'); + } +} + +static int udbg_scc_getc_poll(void) +{ + if (sccc) { + if ((in_8(sccc) & SCC_RXRDY) != 0) + return in_8(sccd); + else + return -1; + } + return -1; +} + +static unsigned char udbg_scc_getc(void) +{ + if (sccc) { + while ((in_8(sccc) & SCC_RXRDY) == 0) + ; + return in_8(sccd); + } + return 0; +} + +static unsigned char scc_inittab[] = { + 13, 0, /* set baud rate divisor */ + 12, 0, + 14, 1, /* baud rate gen enable, src=rtxc */ + 11, 0x50, /* clocks = br gen */ + 5, 0xea, /* tx 8 bits, assert DTR & RTS */ + 4, 0x46, /* x16 clock, 1 stop */ + 3, 0xc1, /* rx enable, 8 bits */ +}; + +void udbg_init_scc(struct device_node *np) +{ + u32 *reg; + unsigned long addr; + int i, x; + + if (np == NULL) + np = of_find_node_by_name(NULL, "escc"); + if (np == NULL || np->parent == NULL) + return; + + udbg_printf("found SCC...\n"); + /* Get address within mac-io ASIC */ + reg = (u32 *)get_property(np, "reg", NULL); + if (reg == NULL) + return; + addr = reg[0]; + udbg_printf("local addr: %lx\n", addr); + /* Get address of mac-io PCI itself */ + reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL); + if (reg == NULL) + return; + addr += reg[2]; + udbg_printf("final addr: %lx\n", addr); + + /* Setup for 57600 8N1 */ + addr += 0x20; + sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ; + sccc += addr & ~PAGE_MASK; + sccd = sccc + 0x10; + + udbg_printf("ioremap result sccc: %p\n", sccc); + mb(); + + for (i = 20000; i != 0; --i) + x = in_8(sccc); + out_8(sccc, 0x09); /* reset A or B side */ + out_8(sccc, 0xc0); + for (i = 0; i < sizeof(scc_inittab); ++i) + out_8(sccc, scc_inittab[i]); + + udbg_putc = udbg_scc_putc; + udbg_getc = udbg_scc_getc; + udbg_getc_poll = udbg_scc_getc_poll; + + udbg_puts("Hello World !\n"); +} + +static void udbg_real_scc_putc(unsigned char c) +{ + while ((real_readb(sccc) & SCC_TXRDY) == 0) + ; + real_writeb(c, sccd); + if (c == '\n') + udbg_real_scc_putc('\r'); +} + +void udbg_init_pmac_realmode(void) +{ + sccc = (volatile u8 __iomem *)0x80013020ul; + sccd = (volatile u8 __iomem *)0x80013030ul; + + udbg_putc = udbg_real_scc_putc; + udbg_getc = NULL; + udbg_getc_poll = NULL; +} diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c new file mode 100644 index 000000000000..f0c47dab0903 --- /dev/null +++ b/arch/powerpc/kernel/vdso.c @@ -0,0 +1,743 @@ +/* + * linux/arch/ppc64/kernel/vdso.c + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/lmb.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* Max supported size for symbol names */ +#define MAX_SYMNAME 64 + +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +unsigned int vdso32_pages; +unsigned long vdso32_sigtramp; +unsigned long vdso32_rt_sigtramp; + +#ifdef CONFIG_PPC64 +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +unsigned int vdso64_pages; +unsigned long vdso64_rt_sigtramp; +#endif /* CONFIG_PPC64 */ + +/* + * The vdso data page (aka. systemcfg for old ppc64 fans) is here. + * Once the early boot kernel code no longer needs to muck around + * with it, it will become dynamically allocated + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* Format of the patch table */ +struct vdso_patch_def +{ + unsigned long ftr_mask, ftr_value; + const char *gen_name; + const char *fix_name; +}; + +/* Table of functions to patch based on the CPU type/revision + * + * Currently, we only change sync_dicache to do nothing on processors + * with a coherent icache + */ +static struct vdso_patch_def vdso_patches[] = { + { + CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, + "__kernel_sync_dicache", "__kernel_sync_dicache_p5" + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_gettimeofday", NULL + }, +}; + +/* + * Some infos carried around for each of them during parsing at + * boot time. + */ +struct lib32_elfinfo +{ + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ + unsigned long text; /* offset of .text section in .so */ +}; + +struct lib64_elfinfo +{ + Elf64_Ehdr *hdr; + Elf64_Sym *dynsym; + unsigned long dynsymsize; + char *dynstr; + unsigned long text; +}; + + +#ifdef __DEBUG +static void dump_one_vdso_page(struct page *pg, struct page *upg) +{ + printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), + page_count(pg), + pg->flags); + if (upg/* && pg != upg*/) { + printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) + << PAGE_SHIFT), + page_count(upg), + upg->flags); + } + printk("\n"); +} + +static void dump_vdso_pages(struct vm_area_struct * vma) +{ + int i; + + if (!vma || test_thread_flag(TIF_32BIT)) { + printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); + for (i=0; i<vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } + if (!vma || !test_thread_flag(TIF_32BIT)) { + printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); + for (i=0; i<vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + + i*PAGE_SIZE); + struct page *upg = (vma && vma->vm_mm) ? + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) + : NULL; + dump_one_vdso_page(pg, upg); + } + } +} +#endif /* DEBUG */ + +/* + * Keep a dummy vma_close for now, it will prevent VMA merging. + */ +static void vdso_vma_close(struct vm_area_struct * vma) +{ +} + +/* + * Our nopage() function, maps in the actual vDSO kernel pages, they will + * be mapped read-only by do_no_page(), and eventually COW'ed, either + * right away for an initial write access, or by do_wp_page(). + */ +static struct page * vdso_vma_nopage(struct vm_area_struct * vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *pg; +#ifdef CONFIG_PPC64 + void *vbase = test_thread_flag(TIF_32BIT) ? + vdso32_kbase : vdso64_kbase; +#else + void *vbase = vdso32_kbase; +#endif + + DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", + current->comm, address, offset); + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + /* + * Last page is systemcfg. + */ + if ((vma->vm_end - address) <= PAGE_SIZE) + pg = virt_to_page(vdso_data); + else + pg = virt_to_page(vbase + offset); + + get_page(pg); + DBG(" ->page count: %d\n", page_count(pg)); + + return pg; +} + +static struct vm_operations_struct vdso_vmops = { + .close = vdso_vma_close, + .nopage = vdso_vma_nopage, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long vdso_pages; + unsigned long vdso_base; + +#ifdef CONFIG_PPC64 + if (test_thread_flag(TIF_32BIT)) { + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; + } else { + vdso_pages = vdso64_pages; + vdso_base = VDSO64_MBASE; + } +#else + vdso_pages = vdso32_pages; + vdso_base = VDSO32_MBASE; +#endif + + current->thread.vdso_base = 0; + + /* vDSO has a problem and was disabled, just don't "enable" it for the + * process + */ + if (vdso_pages == 0) + return 0; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma == NULL) + return -ENOMEM; + + memset(vma, 0, sizeof(*vma)); + + /* Add a page to the vdso size for the data page */ + vdso_pages ++; + + /* + * pick a base address for the vDSO in process space. We try to put it + * at vdso_base which is the "natural" base for it, but we might fail + * and end up putting it elsewhere. + */ + vdso_base = get_unmapped_area(NULL, vdso_base, + vdso_pages << PAGE_SHIFT, 0, 0); + if (vdso_base & ~PAGE_MASK) { + kmem_cache_free(vm_area_cachep, vma); + return (int)vdso_base; + } + + current->thread.vdso_base = vdso_base; + + vma->vm_mm = mm; + vma->vm_start = current->thread.vdso_base; + vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT); + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + */ + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_ops = &vdso_vmops; + + down_write(&mm->mmap_sem); + if (insert_vm_struct(mm, vma)) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + up_write(&mm->mmap_sem); + + return 0; +} + +static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, + const char *symname) +{ + Elf32_Sym *sym = find_symbol32(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO32: function %s not found !\n", + symname); + return 0; + } + return sym->st_value - VDSO32_LBASE; +} + +static int vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf32_Sym *sym32_gen, *sym32_fix; + + sym32_gen = find_symbol32(v32, orig); + if (sym32_gen == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym32_gen->st_name = 0; + return 0; + } + sym32_fix = find_symbol32(v32, fix); + if (sym32_fix == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); + return -1; + } + sym32_gen->st_value = sym32_fix->st_value; + sym32_gen->st_size = sym32_fix->st_size; + sym32_gen->st_info = sym32_fix->st_info; + sym32_gen->st_other = sym32_fix->st_other; + sym32_gen->st_shndx = sym32_fix->st_shndx; + + return 0; +} + + +#ifdef CONFIG_PPC64 + +static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, + unsigned long *size) +{ + Elf64_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + if (size) + *size = 0; + return NULL; +} + +static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, + const char *symname) +{ + unsigned int i; + char name[MAX_SYMNAME], *c; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +/* Note that we assume the section is .text and the symbol is relative to + * the library base + */ +static unsigned long __init find_function64(struct lib64_elfinfo *lib, + const char *symname) +{ + Elf64_Sym *sym = find_symbol64(lib, symname); + + if (sym == NULL) { + printk(KERN_WARNING "vDSO64: function %s not found !\n", + symname); + return 0; + } +#ifdef VDS64_HAS_DESCRIPTORS + return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - + VDSO64_LBASE; +#else + return sym->st_value - VDSO64_LBASE; +#endif +} + +static int vdso_do_func_patch64(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + Elf64_Sym *sym64_gen, *sym64_fix; + + sym64_gen = find_symbol64(v64, orig); + if (sym64_gen == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); + return -1; + } + if (fix == NULL) { + sym64_gen->st_name = 0; + return 0; + } + sym64_fix = find_symbol64(v64, fix); + if (sym64_fix == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); + return -1; + } + sym64_gen->st_value = sym64_fix->st_value; + sym64_gen->st_size = sym64_fix->st_size; + sym64_gen->st_info = sym64_fix->st_info; + sym64_gen->st_other = sym64_fix->st_other; + sym64_gen->st_shndx = sym64_fix->st_shndx; + + return 0; +} + +#endif /* CONFIG_PPC64 */ + + +static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + void *sect; + + /* + * Locate symbol tables & text section + */ + + v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); + v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); + if (v32->dynsym == NULL || v32->dynstr == NULL) { + printk(KERN_ERR "vDSO32: required symbol section not found\n"); + return -1; + } + sect = find_section32(v32->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO32: the .text section was not found\n"); + return -1; + } + v32->text = sect - vdso32_kbase; + +#ifdef CONFIG_PPC64 + v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); + v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); + if (v64->dynsym == NULL || v64->dynstr == NULL) { + printk(KERN_ERR "vDSO64: required symbol section not found\n"); + return -1; + } + sect = find_section64(v64->hdr, ".text", NULL); + if (sect == NULL) { + printk(KERN_ERR "vDSO64: the .text section was not found\n"); + return -1; + } + v64->text = sect - vdso64_kbase; +#endif /* CONFIG_PPC64 */ + + return 0; +} + +static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + /* + * Find signal trampolines + */ + +#ifdef CONFIG_PPC64 + vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); +#endif + vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); + vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); +} + +static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + Elf32_Sym *sym32; +#ifdef CONFIG_PPC64 + Elf64_Sym *sym64; + + sym64 = find_symbol64(v64, "__kernel_datapage_offset"); + if (sym64 == NULL) { + printk(KERN_ERR "vDSO64: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = + (vdso64_pages << PAGE_SHIFT) - + (sym64->st_value - VDSO64_LBASE); +#endif /* CONFIG_PPC64 */ + + sym32 = find_symbol32(v32, "__kernel_datapage_offset"); + if (sym32 == NULL) { + printk(KERN_ERR "vDSO32: Can't find symbol " + "__kernel_datapage_offset !\n"); + return -1; + } + *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = + (vdso32_pages << PAGE_SHIFT) - + (sym32->st_value - VDSO32_LBASE); + + return 0; +} + +static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { + struct vdso_patch_def *patch = &vdso_patches[i]; + int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) + == patch->ftr_value; + if (!match) + continue; + + DBG("replacing %s with %s...\n", patch->gen_name, + patch->fix_name ? "NONE" : patch->fix_name); + + /* + * Patch the 32 bits and 64 bits symbols. Note that we do not + * patch the "." symbol on 64 bits. + * It would be easy to do, but doesn't seem to be necessary, + * patching the OPD symbol is enough. + */ + vdso_do_func_patch32(v32, v64, patch->gen_name, + patch->fix_name); +#ifdef CONFIG_PPC64 + vdso_do_func_patch64(v32, v64, patch->gen_name, + patch->fix_name); +#endif /* CONFIG_PPC64 */ + } + + return 0; +} + + +static __init int vdso_setup(void) +{ + struct lib32_elfinfo v32; + struct lib64_elfinfo v64; + + v32.hdr = vdso32_kbase; +#ifdef CONFIG_PPC64 + v64.hdr = vdso64_kbase; +#endif + if (vdso_do_find_sections(&v32, &v64)) + return -1; + + if (vdso_fixup_datapage(&v32, &v64)) + return -1; + + if (vdso_fixup_alt_funcs(&v32, &v64)) + return -1; + + vdso_setup_trampolines(&v32, &v64); + + return 0; +} + +/* + * Called from setup_arch to initialize the bitmap of available + * syscalls in the systemcfg page + */ +static void __init vdso_setup_syscall_map(void) +{ + unsigned int i; + extern unsigned long *sys_call_table; + extern unsigned long sys_ni_syscall; + + + for (i = 0; i < __NR_syscalls; i++) { +#ifdef CONFIG_PPC64 + if (sys_call_table[i*2] != sys_ni_syscall) + vdso_data->syscall_map_64[i >> 5] |= + 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i*2+1] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#else /* CONFIG_PPC64 */ + if (sys_call_table[i] != sys_ni_syscall) + vdso_data->syscall_map_32[i >> 5] |= + 0x80000000UL >> (i & 0x1f); +#endif /* CONFIG_PPC64 */ + } +} + + +void __init vdso_init(void) +{ + int i; + +#ifdef CONFIG_PPC64 + /* + * Fill up the "systemcfg" stuff for backward compatiblity + */ + strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + vdso_data->version.major = SYSTEMCFG_MAJOR; + vdso_data->version.minor = SYSTEMCFG_MINOR; + vdso_data->processor = mfspr(SPRN_PVR); + vdso_data->platform = _machine; + vdso_data->physicalMemorySize = lmb_phys_mem_size(); + vdso_data->dcache_size = ppc64_caches.dsize; + vdso_data->dcache_line_size = ppc64_caches.dline_size; + vdso_data->icache_size = ppc64_caches.isize; + vdso_data->icache_line_size = ppc64_caches.iline_size; + + /* + * Calculate the size of the 64 bits vDSO + */ + vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; + DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); +#endif /* CONFIG_PPC64 */ + + + /* + * Calculate the size of the 32 bits vDSO + */ + vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; + DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); + + + /* + * Setup the syscall map in the vDOS + */ + vdso_setup_syscall_map(); + /* + * Initialize the vDSO images in memory, that is do necessary + * fixups of vDSO symbols, locate trampolines, etc... + */ + if (vdso_setup()) { + printk(KERN_ERR "vDSO setup failure, not enabled !\n"); + vdso32_pages = 0; +#ifdef CONFIG_PPC64 + vdso64_pages = 0; +#endif + return; + } + + /* Make sure pages are in the correct state */ + for (i = 0; i < vdso32_pages; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + + } +#ifdef CONFIG_PPC64 + for (i = 0; i < vdso64_pages; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + } +#endif /* CONFIG_PPC64 */ + + get_page(virt_to_page(vdso_data)); +} + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} + diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile new file mode 100644 index 000000000000..8a3bed5f143a --- /dev/null +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -0,0 +1,40 @@ + +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o + +# Build rules + +ifeq ($(CONFIG_PPC32),y) +CROSS32CC := $(CC) +endif + +targets := $(obj-vdso32) vdso32.so +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + + +EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin +EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 +EXTRA_AFLAGS := -D__VDSO32__ -s + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -Upowerpc + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< + diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S new file mode 100644 index 000000000000..09629aea3e47 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -0,0 +1,69 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + * + * Note: all CPUs supported by this kernel have a 128 bytes cache + * line size so we don't have to peek that info from the datapage + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + li r5,127 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + srwi. r8,r8,7 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 + mr r3,r6 +1: dcbst 0,r3 + addi r3,r3,128 + bdnz 1b + sync + mtctr r8 +1: icbi 0,r6 + addi r6,r6,128 + bdnz 1b + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + crclr cr0*4+so + sync + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) + diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S new file mode 100644 index 000000000000..4709f1d9542c --- /dev/null +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -0,0 +1,86 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r4,r3 + bl __get_datapage@local + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP32 + cmpli cr0,r4,0 + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) + +/* + * void unsigned long long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local + lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) + lwz r3,CFG_TB_TICKS_PER_SEC(r3) + mtlr r12 + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S new file mode 100644 index 000000000000..7eebff03a041 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -0,0 +1,323 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r10,r3 /* r10 saves tv */ + mr r11,r4 /* r11 saves tz */ + bl __get_datapage@local /* get data page */ + mr r9, r3 /* datapage ptr in r9 */ + bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 2f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r5,r4,12,20,31 + rlwimi r5,r3,12,0,19 + stw r5,TVAL32_TV_SEC(r10) + + /* get remaining xsec and convert to usec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication + */ + rlwinm r5,r4,12,0,19 + lis r6,1000000@h + ori r6,r6,1000000@l + mulhwu r5,r5,r6 + stw r5,TVAL32_TV_USEC(r10) + + cmpli cr0,r11,0 /* check if tz is NULL */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r9) + stw r4,TZONE_TZ_MINWEST(r11) + stw r5,TZONE_TZ_DSTTIME(r11) + +1: mtlr r12 + crclr cr0*4+so + li r3,0 + blr + +2: + mtlr r12 + mr r3,r10 + mr r4,r11 + li r0,__NR_gettimeofday + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpli cr0,r3,CLOCK_REALTIME + cmpli cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r10,r3 /* r10 saves id */ + mr r11,r4 /* r11 saves tp */ + bl __get_datapage@local /* get data page */ + mr r9,r3 /* datapage ptr in r9 */ + beq cr1,50f /* if monotonic -> jump there */ + + /* + * CLOCK_REALTIME + */ + + bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 98f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r5,r4,12,20,31 + rlwimi r5,r3,12,0,19 + stw r5,TSPC32_TV_SEC(r11) + + /* get remaining xsec and convert to nsec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication, then we multiply by 1000 + */ + rlwinm r5,r4,12,0,19 + lis r6,1000000@h + ori r6,r6,1000000@l + mulhwu r5,r5,r6 + mulli r5,r5,1000 + stw r5,TSPC32_TV_NSEC(r11) + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * CLOCK_MONOTONIC + */ + +50: bl __do_get_xsec@local /* get xsec from tb & kernel */ + bne- 98f /* out of line -> do syscall */ + + /* seconds are xsec >> 20 */ + rlwinm r6,r4,12,20,31 + rlwimi r6,r3,12,0,19 + + /* get remaining xsec and convert to nsec. we scale + * up remaining xsec by 12 bits and get the top 32 bits + * of the multiplication, then we multiply by 1000 + */ + rlwinm r7,r4,12,0,19 + lis r5,1000000@h + ori r5,r5,1000000@l + mulhwu r7,r7,r5 + mulli r7,r7,1000 + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_xsec. + * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5 + * can be used + */ + + lwz r3,WTOM_CLOCK_SEC(r9) + lwz r4,WTOM_CLOCK_NSEC(r9) + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + or r5,r4,r3 + xor r0,r5,r5 + add r9,r9,r0 +#ifdef CONFIG_PPC64 + lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) +#else + lwz r0,(CFG_TB_UPDATE_COUNT)(r9) +#endif + cmpl cr0,r8,r0 /* check if updated */ + bne- 50b + + /* Calculate and store result. Note that this mimmics the C code, + * which may cause funny results if nsec goes negative... is that + * possible at all ? + */ + add r3,r3,r6 + add r4,r4,r7 + lis r5,NSEC_PER_SEC@h + ori r5,r5,NSEC_PER_SEC@l + cmpl cr0,r4,r5 + cmpli cr1,r4,0 + blt 1f + subf r4,r5,r4 + addi r3,r3,1 +1: bge cr1,1f + addi r3,r3,-1 + add r4,r4,r5 +1: stw r3,TSPC32_TV_SEC(r11) + stw r4,TSPC32_TV_NSEC(r11) + + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * syscall fallback + */ +98: + mtlr r12 + mr r3,r10 + mr r4,r11 +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + stw r3,TSPC32_TV_SEC(r4) + stw r5,TSPC32_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of gettimeofday() & friends, it returns the xsec + * value in r3 & r4 and expects the datapage ptr (non clobbered) + * in r9. clobbers r0,r4,r5,r6,r7,r8. + * When returning, r8 contains the counter value that can be reused + * by the monotonic clock implementation + */ +__do_get_xsec: + .cfi_startproc + /* Check for update count & load values. We use the low + * order 32 bits of the update count + */ +#ifdef CONFIG_PPC64 +1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) +#else +1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9) +#endif + andi. r0,r8,1 /* pending update ? loop */ + bne- 1b + xor r0,r8,r8 /* create dependency */ + add r9,r9,r0 + + /* Load orig stamp (offset to TB) */ + lwz r5,CFG_TB_ORIG_STAMP(r9) + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) + + /* Get a stable TB value */ +2: mftbu r3 + mftbl r4 + mftbu r0 + cmpl cr0,r3,r0 + bne- 2b + + /* Substract tb orig stamp. If the high part is non-zero, we jump to + * the slow path which call the syscall. + * If it's ok, then we have our 32 bits tb_ticks value in r7 + */ + subfc r7,r6,r4 + subfe. r0,r5,r3 + bne- 3f + + /* Load scale factor & do multiplication */ + lwz r5,CFG_TB_TO_XS(r9) /* load values */ + lwz r6,(CFG_TB_TO_XS+4)(r9) + mulhwu r4,r7,r5 + mulhwu r6,r7,r6 + mullw r0,r7,r5 + addc r6,r6,r0 + + /* At this point, we have the scaled xsec value in r4 + XER:CA + * we load & add the stamp since epoch + */ + lwz r5,CFG_STAMP_XSEC(r9) + lwz r6,(CFG_STAMP_XSEC+4)(r9) + adde r4,r4,r6 + addze r3,r5 + + /* We now have our result in r3,r4. We create a fake dependency + * on that result and re-check the counter + */ + or r6,r4,r3 + xor r0,r6,r6 + add r9,r9,r0 +#ifdef CONFIG_PPC64 + lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) +#else + lwz r0,(CFG_TB_UPDATE_COUNT)(r9) +#endif + cmpl cr0,r8,r0 /* check if updated */ + bne- 1b + + /* Warning ! The caller expects CR:EQ to be set to indicate a + * successful calculation (so it won't fallback to the syscall + * method). We have overriden that CR bit in the counter check, + * but fortunately, the loop exit condition _is_ CR:EQ set, so + * we can exit safely here. If you change this code, be careful + * of that side effect. + */ +3: blr + .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S new file mode 100644 index 000000000000..d4b5be4f3d5f --- /dev/null +++ b/arch/powerpc/kernel/vdso32/note.S @@ -0,0 +1,25 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S new file mode 100644 index 000000000000..e04642781917 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/sigtramp.S @@ -0,0 +1,300 @@ +/* + * Signal trampolines for 32 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. */ + nop +V_FUNCTION_BEGIN(__kernel_sigtramp32) +.Lsig_start = . - 4 + li r0,__NR_sigreturn + sc +.Lsig_end: +V_FUNCTION_END(__kernel_sigtramp32) + +.Lsigrt_start: + nop +V_FUNCTION_BEGIN(__kernel_sigtramp_rt32) + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt32) + + .section .eh_frame,"a",@progbits + +/* Register r1 can be found at offset 4 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. The VMX reg struct is at offset VREGS of + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 64+28 + +/* Size of regs. */ +#define RSIZE 4 + +/* This is the offset of the VMX regs. */ +#define VREGS 48*RSIZE+34*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 32*16) +#else +#define EH_FRAME_VMX +#endif + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 4 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsig_start - . /* PC start, length */ + .long .Lsig_end - .Lsig_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde0_end: + +/* We have a different stack layout for rt_sigreturn. */ +#undef PTREGS +#define PTREGS 64+16+128+20+28 + + .long .Lfde1_end - .Lfde1_start +.Lfde1_start: + .long .Lfde1_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX + .balign 4 +.Lfde1_end: diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..f4bad720cb0a --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,117 @@ + +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +OUTPUT_ARCH(powerpc:common) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } + .plt : { *(.plt) } + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_datapage_offset; /* Has to be there for the kernel to find */ + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp32; + __kernel_sigtramp_rt32; + local: *; + }; +} diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 000000000000..556f0caa5d84 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,13 @@ +#include <linux/init.h> +#include <asm/page.h> + + .section ".data.page_aligned" + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/powerpc/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile new file mode 100644 index 000000000000..ab39988452cc --- /dev/null +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -0,0 +1,35 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin +EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 +EXTRA_AFLAGS := -D__VDSO64__ -s + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + + diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S new file mode 100644 index 000000000000..cb4ae0a5edd0 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -0,0 +1,68 @@ +/* + * vDSO provided cache flush routines + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + +/* + * Default "generic" version of __kernel_sync_dicache. + * + * void __kernel_sync_dicache(unsigned long start, unsigned long end) + * + * Flushes the data cache & invalidate the instruction cache for the + * provided range [start, end[ + * + * Note: all CPUs supported by this kernel have a 128 bytes cache + * line size so we don't have to peek that info from the datapage + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache) + .cfi_startproc + li r5,127 + andc r6,r3,r5 /* round low to line bdy */ + subf r8,r6,r4 /* compute length */ + add r8,r8,r5 /* ensure we get enough */ + srwi. r8,r8,7 /* compute line count */ + crclr cr0*4+so + beqlr /* nothing to do? */ + mtctr r8 + mr r3,r6 +1: dcbst 0,r3 + addi r3,r3,128 + bdnz 1b + sync + mtctr r8 +1: icbi 0,r6 + addi r6,r6,128 + bdnz 1b + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache) + + +/* + * POWER5 version of __kernel_sync_dicache + */ +V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) + .cfi_startproc + crclr cr0*4+so + sync + isync + li r3,0 + blr + .cfi_endproc +V_FUNCTION_END(__kernel_sync_dicache_p5) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S new file mode 100644 index 000000000000..3b2dd7d0c1eb --- /dev/null +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -0,0 +1,86 @@ +/* + * Access to the shared data page by the vDSO & syscall map + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/vdso.h> + + .text +V_FUNCTION_BEGIN(__get_datapage) + .cfi_startproc + /* We don't want that exposed or overridable as we want other objects + * to be able to bl directly to here + */ + .protected __get_datapage + .hidden __get_datapage + + mflr r0 + .cfi_register lr,r0 + + bcl 20,31,1f + .global __kernel_datapage_offset; +__kernel_datapage_offset: + .long 0 +1: + mflr r3 + mtlr r0 + lwz r0,0(r3) + add r3,r0,r3 + blr + .cfi_endproc +V_FUNCTION_END(__get_datapage) + +/* + * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; + * + * returns a pointer to the syscall map. the map is agnostic to the + * size of "long", unlike kernel bitops, it stores bits from top to + * bottom so that memory actually contains a linear bitmap + * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of + * 32 bits int at N >> 5. + */ +V_FUNCTION_BEGIN(__kernel_get_syscall_map) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + mr r4,r3 + bl V_LOCAL_FUNC(__get_datapage) + mtlr r12 + addi r3,r3,CFG_SYSCALL_MAP64 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + li r0,__NR_syscalls + stw r0,0(r4) + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_syscall_map) + + +/* + * void unsigned long __kernel_get_tbfreq(void); + * + * returns the timebase frequency in HZ + */ +V_FUNCTION_BEGIN(__kernel_get_tbfreq) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + ld r3,CFG_TB_TICKS_PER_SEC(r3) + mtlr r12 + crclr cr0*4+so + blr + .cfi_endproc +V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S new file mode 100644 index 000000000000..ccaeda5136d1 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -0,0 +1,253 @@ + + /* + * Userland implementation of gettimeofday() for 64 bits processes in a + * ppc64 kernel for use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text +/* + * Exact prototype of gettimeofday + * + * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); + * + */ +V_FUNCTION_BEGIN(__kernel_gettimeofday) + .cfi_startproc + mflr r12 + .cfi_register lr,r12 + + mr r11,r3 /* r11 holds tv */ + mr r10,r4 /* r10 holds tz */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r0,r0,44,20 + cmpldi cr0,r10,0 /* check if tz is NULL */ + std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ + beq 1f + lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ + lwz r5,CFG_TZ_DSTTIME(r3) + stw r4,TZONE_TZ_MINWEST(r10) + stw r5,TZONE_TZ_DSTTIME(r10) +1: mtlr r12 + crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_gettimeofday) + + +/* + * Exact prototype of clock_gettime() + * + * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + mflr r12 /* r12 saves lr */ + .cfi_register lr,r12 + mr r10,r3 /* r10 saves id */ + mr r11,r4 /* r11 saves tp */ + bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + beq cr1,50f /* if monotonic -> jump there */ + + /* + * CLOCK_REALTIME + */ + + bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + std r5,TSPC64_TV_SEC(r11) /* store sec in tv */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r0,r0,44,20 + mulli r0,r0,1000 /* nsec = usec * 1000 */ + std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */ + + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * CLOCK_MONOTONIC + */ + +50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ + + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 + rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ + rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ + subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / + * XSEC_PER_SEC + */ + rldicl r6,r0,44,20 + mulli r6,r6,1000 /* nsec = usec * 1000 */ + + /* now we must fixup using wall to monotonic. We need to snapshot + * that value and do the counter trick again. Fortunately, we still + * have the counter value in r8 that was returned by __do_get_xsec. + * At this point, r5,r6 contain our sec/nsec values. + * can be used + */ + + lwa r4,WTOM_CLOCK_SEC(r3) + lwa r7,WTOM_CLOCK_NSEC(r3) + + /* We now have our result in r4,r7. We create a fake dependency + * on that result and re-check the counter + */ + or r9,r4,r7 + xor r0,r9,r9 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 50b + + /* Calculate and store result. Note that this mimmics the C code, + * which may cause funny results if nsec goes negative... is that + * possible at all ? + */ + add r4,r4,r5 + add r7,r7,r6 + lis r9,NSEC_PER_SEC@h + ori r9,r9,NSEC_PER_SEC@l + cmpl cr0,r7,r9 + cmpli cr1,r7,0 + blt 1f + subf r7,r9,r7 + addi r4,r4,1 +1: bge cr1,1f + addi r4,r4,-1 + add r7,r7,r9 +1: std r4,TSPC64_TV_SEC(r11) + std r7,TSPC64_TV_NSEC(r11) + + mtlr r12 + crclr cr0*4+so + li r3,0 + blr + + /* + * syscall fallback + */ +98: + mtlr r12 + mr r3,r10 + mr r4,r11 +99: + li r0,__NR_clock_gettime + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_gettime) + + +/* + * Exact prototype of clock_getres() + * + * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_getres) + .cfi_startproc + /* Check for supported clock IDs */ + cmpwi cr0,r3,CLOCK_REALTIME + cmpwi cr1,r3,CLOCK_MONOTONIC + cror cr0*4+eq,cr0*4+eq,cr1*4+eq + bne cr0,99f + + li r3,0 + cmpli cr0,r4,0 + crclr cr0*4+so + beqlr + lis r5,CLOCK_REALTIME_RES@h + ori r5,r5,CLOCK_REALTIME_RES@l + std r3,TSPC64_TV_SEC(r4) + std r5,TSPC64_TV_NSEC(r4) + blr + + /* + * syscall fallback + */ +99: + li r0,__NR_clock_getres + sc + blr + .cfi_endproc +V_FUNCTION_END(__kernel_clock_getres) + + +/* + * This is the core of gettimeofday(), it returns the xsec + * value in r4 and expects the datapage ptr (non clobbered) + * in r3. clobbers r0,r4,r5,r6,r7,r8 + * When returning, r8 contains the counter value that can be reused + */ +V_FUNCTION_BEGIN(__do_get_xsec) + .cfi_startproc + /* check for update count & load values */ +1: ld r8,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r4,1 /* pending update ? loop */ + bne- 1b + xor r0,r4,r4 /* create dependency */ + add r3,r3,r0 + + /* Get TB & offset it */ + mftb r7 + ld r9,CFG_TB_ORIG_STAMP(r3) + subf r7,r9,r7 + + /* Scale result */ + ld r5,CFG_TB_TO_XS(r3) + mulhdu r7,r7,r5 + + /* Add stamp since epoch */ + ld r6,CFG_STAMP_XSEC(r3) + add r4,r6,r7 + + xor r0,r4,r4 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 1b + blr + .cfi_endproc +V_FUNCTION_END(__do_get_xsec) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S new file mode 100644 index 000000000000..dc2a509f7e8a --- /dev/null +++ b/arch/powerpc/kernel/vdso64/note.S @@ -0,0 +1 @@ +#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S new file mode 100644 index 000000000000..31b604ab56de --- /dev/null +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -0,0 +1,295 @@ +/* + * Signal trampoline for 64 bits processes in a ppc64 kernel for + * use in the vDSO + * + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. + * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/unistd.h> +#include <asm/vdso.h> +#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */ + + .text + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by padding before the + real start. */ + nop + .balign 8 +V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) +.Lsigrt_start = . - 4 + addi r1, r1, __SIGNAL_FRAMESIZE + li r0,__NR_rt_sigreturn + sc +.Lsigrt_end: +V_FUNCTION_END(__kernel_sigtramp_rt64) +/* The ".balign 8" above and the following zeros mimic the old stack + trampoline layout. The last magic value is the ucontext pointer, + chosen in such a way that older libgcc unwind code returns a zero + for a sigcontext pointer. */ + .long 0,0,0 + .quad 0,-21*8 + +/* Register r1 can be found at offset 8 of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define cfa_save \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ +9: + +/* Register REGNO can be found at offset OFS of a pt_regs structure. + A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */ +#define rsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .ifne ofs; \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ + .endif; \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in + the pt_regs struct. This macro is for REGNO == 0, and contains + 'subroutines' that the other macros jump to. */ +#define vsave_msr0(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit0 */ \ +2: \ + .byte 0x40; /* DW_OP_lit16 */ \ + .byte 0x1e; /* DW_OP_mul */ \ +3: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x12; /* DW_OP_dup */ \ + .byte 0x23; /* DW_OP_plus_uconst */ \ + .uleb128 33*RSIZE; /* msr offset */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \ + .byte 0x1a; /* DW_OP_and */ \ + .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \ + .byte 0x30; /* DW_OP_lit0 */ \ + .byte 0x29; /* DW_OP_eq */ \ + .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \ + .byte 0x13; /* DW_OP_drop, pop the 0 */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x22; /* DW_OP_plus */ \ + .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16 + of the VMX reg struct. REGNO is 1 thru 31. */ +#define vsave_msr1(regno) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x30 + regno; /* DW_OP_lit n */ \ + .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \ +9: + +/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of + the VMX save block. */ +#define vsave_msr2(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x0a; .short ofs; /* DW_OP_const2u */ \ + .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \ +9: + +/* VMX register REGNO is at offset OFS of the VMX save area. */ +#define vsave(regno, ofs) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno + 77; /* regno */ \ + .uleb128 9f - 1f; /* length */ \ +1: \ + .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \ + .byte 0x06; /* DW_OP_deref */ \ + .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \ +9: + +/* This is where the pt_regs pointer can be found on the stack. */ +#define PTREGS 128+168+56 + +/* Size of regs. */ +#define RSIZE 8 + +/* This is the offset of the VMX reg pointer. */ +#define VREGS 48*RSIZE+33*8 + +/* Describe where general purpose regs are saved. */ +#define EH_FRAME_GEN \ + cfa_save; \ + rsave ( 0, 0*RSIZE); \ + rsave ( 2, 2*RSIZE); \ + rsave ( 3, 3*RSIZE); \ + rsave ( 4, 4*RSIZE); \ + rsave ( 5, 5*RSIZE); \ + rsave ( 6, 6*RSIZE); \ + rsave ( 7, 7*RSIZE); \ + rsave ( 8, 8*RSIZE); \ + rsave ( 9, 9*RSIZE); \ + rsave (10, 10*RSIZE); \ + rsave (11, 11*RSIZE); \ + rsave (12, 12*RSIZE); \ + rsave (13, 13*RSIZE); \ + rsave (14, 14*RSIZE); \ + rsave (15, 15*RSIZE); \ + rsave (16, 16*RSIZE); \ + rsave (17, 17*RSIZE); \ + rsave (18, 18*RSIZE); \ + rsave (19, 19*RSIZE); \ + rsave (20, 20*RSIZE); \ + rsave (21, 21*RSIZE); \ + rsave (22, 22*RSIZE); \ + rsave (23, 23*RSIZE); \ + rsave (24, 24*RSIZE); \ + rsave (25, 25*RSIZE); \ + rsave (26, 26*RSIZE); \ + rsave (27, 27*RSIZE); \ + rsave (28, 28*RSIZE); \ + rsave (29, 29*RSIZE); \ + rsave (30, 30*RSIZE); \ + rsave (31, 31*RSIZE); \ + rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ + rsave (65, 36*RSIZE); /* lr */ \ + rsave (70, 38*RSIZE) /* cr */ + +/* Describe where the FP regs are saved. */ +#define EH_FRAME_FP \ + rsave (32, 48*RSIZE + 0*8); \ + rsave (33, 48*RSIZE + 1*8); \ + rsave (34, 48*RSIZE + 2*8); \ + rsave (35, 48*RSIZE + 3*8); \ + rsave (36, 48*RSIZE + 4*8); \ + rsave (37, 48*RSIZE + 5*8); \ + rsave (38, 48*RSIZE + 6*8); \ + rsave (39, 48*RSIZE + 7*8); \ + rsave (40, 48*RSIZE + 8*8); \ + rsave (41, 48*RSIZE + 9*8); \ + rsave (42, 48*RSIZE + 10*8); \ + rsave (43, 48*RSIZE + 11*8); \ + rsave (44, 48*RSIZE + 12*8); \ + rsave (45, 48*RSIZE + 13*8); \ + rsave (46, 48*RSIZE + 14*8); \ + rsave (47, 48*RSIZE + 15*8); \ + rsave (48, 48*RSIZE + 16*8); \ + rsave (49, 48*RSIZE + 17*8); \ + rsave (50, 48*RSIZE + 18*8); \ + rsave (51, 48*RSIZE + 19*8); \ + rsave (52, 48*RSIZE + 20*8); \ + rsave (53, 48*RSIZE + 21*8); \ + rsave (54, 48*RSIZE + 22*8); \ + rsave (55, 48*RSIZE + 23*8); \ + rsave (56, 48*RSIZE + 24*8); \ + rsave (57, 48*RSIZE + 25*8); \ + rsave (58, 48*RSIZE + 26*8); \ + rsave (59, 48*RSIZE + 27*8); \ + rsave (60, 48*RSIZE + 28*8); \ + rsave (61, 48*RSIZE + 29*8); \ + rsave (62, 48*RSIZE + 30*8); \ + rsave (63, 48*RSIZE + 31*8) + +/* Describe where the VMX regs are saved. */ +#ifdef CONFIG_ALTIVEC +#define EH_FRAME_VMX \ + vsave_msr0 ( 0); \ + vsave_msr1 ( 1); \ + vsave_msr1 ( 2); \ + vsave_msr1 ( 3); \ + vsave_msr1 ( 4); \ + vsave_msr1 ( 5); \ + vsave_msr1 ( 6); \ + vsave_msr1 ( 7); \ + vsave_msr1 ( 8); \ + vsave_msr1 ( 9); \ + vsave_msr1 (10); \ + vsave_msr1 (11); \ + vsave_msr1 (12); \ + vsave_msr1 (13); \ + vsave_msr1 (14); \ + vsave_msr1 (15); \ + vsave_msr1 (16); \ + vsave_msr1 (17); \ + vsave_msr1 (18); \ + vsave_msr1 (19); \ + vsave_msr1 (20); \ + vsave_msr1 (21); \ + vsave_msr1 (22); \ + vsave_msr1 (23); \ + vsave_msr1 (24); \ + vsave_msr1 (25); \ + vsave_msr1 (26); \ + vsave_msr1 (27); \ + vsave_msr1 (28); \ + vsave_msr1 (29); \ + vsave_msr1 (30); \ + vsave_msr1 (31); \ + vsave_msr2 (33, 32*16+12); \ + vsave (32, 33*16) +#else +#define EH_FRAME_VMX +#endif + + .section .eh_frame,"a",@progbits +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 -8 /* Data alignment factor */ + .byte 67 /* Return address register column, ap */ + .uleb128 1 /* Augmentation value length */ + .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */ + .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */ + .balign 8 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .quad .Lsigrt_start - . /* PC start, length */ + .quad .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + EH_FRAME_GEN + EH_FRAME_FP + EH_FRAME_VMX +# Do we really need to describe the frame at this point? ie. will +# we ever have some call chain that returns somewhere past the addi? +# I don't think so, since gcc doesn't support async signals. +# .byte 0x41 /* DW_CFA_advance_loc 1*4 */ +#undef PTREGS +#define PTREGS 168+56 +# EH_FRAME_GEN +# EH_FRAME_FP +# EH_FRAME_VMX + .balign 8 +.Lfde0_end: diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S new file mode 100644 index 000000000000..4bdf224464ab --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,116 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc") +OUTPUT_ARCH(powerpc:common64) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + *(.sfpr .glink) + } :text + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + + .opd ALIGN(8) : { KEEP (*(.opd)) } + .got ALIGN(8) : { *(.got .toc) } + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + + .dynamic : { *(.dynamic) } :text :dynamic + + _end = .; + PROVIDE (end = .); + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sectio/ns. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.branch_lt) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_datapage_offset; /* Has to be there for the kernel to find */ + __kernel_get_syscall_map; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_get_tbfreq; + __kernel_sync_dicache; + __kernel_sync_dicache_p5; + __kernel_sigtramp_rt64; + local: *; + }; +} diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 000000000000..0529cb9e3b97 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,13 @@ +#include <linux/init.h> +#include <asm/page.h> + + .section ".data.page_aligned" + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/powerpc/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/powerpc/lib/bitops.c b/arch/powerpc/lib/bitops.c index b67ce3004ebf..f68ad71a0187 100644 --- a/arch/powerpc/lib/bitops.c +++ b/arch/powerpc/lib/bitops.c @@ -41,7 +41,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, tmp = *p; found_first: - tmp &= (~0UL >> (64 - size)); + tmp &= (~0UL >> (BITS_PER_LONG - size)); if (tmp == 0UL) /* Are any bits set? */ return result + size; /* Nope. */ found_middle: diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c index b7bcbc232f39..4d006aa1a0d1 100644 --- a/arch/powerpc/mm/4xx_mmu.c +++ b/arch/powerpc/mm/4xx_mmu.c @@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_16M; p += LARGE_PAGE_SIZE_16M; @@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_4M; p += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index af9ca0eb6d55..5d581bb3aa12 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -1,5 +1,5 @@ /* - * Modifications by Kumar Gala (kumar.gala@freescale.com) to support + * Modifications by Kumar Gala (galak@kernel.crashing.org) to support * E500 Book E processors. * * Copyright 2004 Freescale Semiconductor, Inc diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 22e474876133..a606504678bd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -84,10 +84,11 @@ extern unsigned long dart_tablebase; #endif /* CONFIG_U3_DART */ +static unsigned long _SDR1; +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + hpte_t *htab_address; unsigned long htab_hash_mask; -unsigned long _SDR1; -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; int mmu_linear_psize = MMU_PAGE_4K; int mmu_virtual_psize = MMU_PAGE_4K; #ifdef CONFIG_HUGETLB_PAGE @@ -165,7 +166,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, * normal insert callback here. */ #ifdef CONFIG_PPC_ISERIES - if (systemcfg->platform == PLATFORM_ISERIES_LPAR) + if (_machine == PLATFORM_ISERIES_LPAR) ret = iSeries_hpte_insert(hpteg, va, virt_to_abs(paddr), tmp_mode, @@ -174,7 +175,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, else #endif #ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform & PLATFORM_LPAR) + if (_machine & PLATFORM_LPAR) ret = pSeries_lpar_hpte_insert(hpteg, va, virt_to_abs(paddr), tmp_mode, @@ -293,7 +294,7 @@ static void __init htab_init_page_sizes(void) * Not in the device-tree, let's fallback on known size * list for 16M capable GP & GR */ - if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) && + if ((_machine != PLATFORM_ISERIES_LPAR) && cpu_has_feature(CPU_FTR_16M_PAGE)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); @@ -364,7 +365,7 @@ static int __init htab_dt_scan_pftsize(unsigned long node, static unsigned long __init htab_get_table_size(void) { - unsigned long rnd_mem_size, pteg_count; + unsigned long mem_size, rnd_mem_size, pteg_count; /* If hash size isn't already provided by the platform, we try to * retreive it from the device-tree. If it's not there neither, we @@ -376,8 +377,9 @@ static unsigned long __init htab_get_table_size(void) return 1UL << ppc64_pft_size; /* round mem_size up to next power of 2 */ - rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize); - if (rnd_mem_size < systemcfg->physicalMemorySize) + mem_size = lmb_phys_mem_size(); + rnd_mem_size = 1UL << __ilog2(mem_size); + if (rnd_mem_size < mem_size) rnd_mem_size <<= 1; /* # pages / 2 */ @@ -386,6 +388,15 @@ static unsigned long __init htab_get_table_size(void) return pteg_count << 7; } +#ifdef CONFIG_MEMORY_HOTPLUG +void create_section_mapping(unsigned long start, unsigned long end) +{ + BUG_ON(htab_bolt_mapping(start, end, start, + _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX, + mmu_linear_psize)); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + void __init htab_initialize(void) { unsigned long table, htab_size_bytes; @@ -410,7 +421,7 @@ void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (systemcfg->platform & PLATFORM_LPAR) { + if (platform_is_lpar()) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -431,6 +442,9 @@ void __init htab_initialize(void) /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); + + /* Set SDR1 */ + mtspr(SPRN_SDR1, _SDR1); } mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; @@ -500,6 +514,12 @@ void __init htab_initialize(void) #undef KB #undef MB +void htab_initialize_secondary(void) +{ + if (!platform_is_lpar()) + mtspr(SPRN_SDR1, _SDR1); +} + /* * Called by asm hashtable.S for doing lazy icache flush */ @@ -581,7 +601,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Handle hugepage regions */ if (unlikely(in_hugepage_area(mm->context, ea))) { DBG_LOW(" -> huge page !\n"); - return hash_huge_page(mm, access, ea, vsid, local); + return hash_huge_page(mm, access, ea, vsid, local, trap); } /* Get PTE and page size from page tables */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 426c269e552e..54131b877da3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -148,43 +148,63 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } +struct slb_flush_info { + struct mm_struct *mm; + u16 newareas; +}; + static void flush_low_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i; - asm volatile("isync" : : : "memory"); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); + + if (current->active_mm != fi->mm) + return; + + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ - BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" : : "r" ((i << SID_SHIFT) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } static void flush_high_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i, j; - asm volatile("isync" : : : "memory"); - BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); + if (current->active_mm != fi->mm) + return; + + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) asm volatile("slbie %0" :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); + + (j << SID_SHIFT)) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } @@ -229,6 +249,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; + struct slb_flush_info fi; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); @@ -244,19 +265,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) mm->context.low_htlb_areas |= newareas; - /* update the paca copy of the context struct */ - get_paca()->context = mm->context; - /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_low_segments, &fi, 0, 1); return 0; } static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) { + struct slb_flush_info fi; unsigned long i; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); @@ -280,22 +302,25 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_high_segments, &fi, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - int err; + int err = 0; if ( (addr+len) < addr ) return -EINVAL; - if ((addr + len) < 0x100000000UL) + if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - else + if ((addr + len) > 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { @@ -639,8 +664,36 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, + pte_t pte, int trap) +{ + struct page *page; + int i; + + if (!pfn_valid(pte_pfn(pte))) + return rflags; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + __flush_dcache_icache(page_address(page+i)); + set_bit(PG_arch_1, &page->flags); + } else { + rflags |= HPTE_R_N; + } + } + return rflags; +} + int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local) + unsigned long ea, unsigned long vsid, int local, + unsigned long trap) { pte_t *ptep; unsigned long old_pte, new_pte; @@ -691,6 +744,11 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + /* No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case */ + rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), + trap); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { @@ -703,7 +761,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -754,9 +813,7 @@ repeat: } /* - * No need to use ldarx/stdcx here because all who - * might be updating the pte will hold the - * page_table_lock + * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c index f4ca29cf5364..f9587bcc6a48 100644 --- a/arch/powerpc/mm/imalloc.c +++ b/arch/powerpc/mm/imalloc.c @@ -14,9 +14,10 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/semaphore.h> -#include <asm/imalloc.h> #include <asm/cacheflush.h> +#include "mmu_decl.h" + static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 4612a79dfb6e..7d4b8b5f0606 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -84,9 +84,6 @@ void MMU_init(void); /* XXX should be in current.h -- paulus */ extern struct task_struct *current_set[NR_CPUS]; -char *klimit = _end; -struct device_node *memory_node; - extern int init_bootmem_done; /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ce974c83d88a..81cfb0c2ec58 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -20,6 +20,8 @@ * */ +#undef DEBUG + #include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> @@ -62,7 +64,14 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> -#include <asm/imalloc.h> + +#include "mmu_decl.h" + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif #if PGTABLE_RANGE > USER_VSID_RANGE #warning Limited user VSID range means pagetable space is wasted @@ -72,8 +81,6 @@ #warning TASK_SIZE is smaller than it needs to be. #endif -unsigned long klimit = (unsigned long)_end; - /* max amount of RAM to use */ unsigned long __max_memory; @@ -188,14 +195,14 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) } #ifdef CONFIG_PPC_64K_PAGES -static const int pgtable_cache_size[2] = { - PTE_TABLE_SIZE, PGD_TABLE_SIZE +static const unsigned int pgtable_cache_size[3] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE }; static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pte_pmd_cache", "pgd_cache", + "pte_pmd_cache", "pmd_cache", "pgd_cache", }; #else -static const int pgtable_cache_size[2] = { +static const unsigned int pgtable_cache_size[2] = { PTE_TABLE_SIZE, PMD_TABLE_SIZE }; static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { @@ -213,6 +220,8 @@ void pgtable_cache_init(void) int size = pgtable_cache_size[i]; const char *name = pgtable_cache_name[i]; + DBG("Allocating page table cache %s (#%d) " + "for size: %08x...\n", name, i, size); pgtable_cache[i] = kmem_cache_create(name, size, size, SLAB_HWCACHE_ALIGN | diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 9b5aa6808eb8..9584608fd768 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -22,35 +22,38 @@ #include "mmu_decl.h" /* for __max_low_memory */ #endif -struct lmb lmb; - #undef DEBUG +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +struct lmb lmb; + void lmb_dump_all(void) { #ifdef DEBUG unsigned long i; - udbg_printf("lmb_dump_all:\n"); - udbg_printf(" memory.cnt = 0x%lx\n", - lmb.memory.cnt); - udbg_printf(" memory.size = 0x%lx\n", - lmb.memory.size); + DBG("lmb_dump_all:\n"); + DBG(" memory.cnt = 0x%lx\n", lmb.memory.cnt); + DBG(" memory.size = 0x%lx\n", lmb.memory.size); for (i=0; i < lmb.memory.cnt ;i++) { - udbg_printf(" memory.region[0x%x].base = 0x%lx\n", + DBG(" memory.region[0x%x].base = 0x%lx\n", i, lmb.memory.region[i].base); - udbg_printf(" .size = 0x%lx\n", + DBG(" .size = 0x%lx\n", lmb.memory.region[i].size); } - udbg_printf("\n reserved.cnt = 0x%lx\n", - lmb.reserved.cnt); - udbg_printf(" reserved.size = 0x%lx\n", - lmb.reserved.size); + DBG("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt); + DBG(" reserved.size = 0x%lx\n", lmb.reserved.size); for (i=0; i < lmb.reserved.cnt ;i++) { - udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", + DBG(" reserved.region[0x%x].base = 0x%lx\n", i, lmb.reserved.region[i].base); - udbg_printf(" .size = 0x%lx\n", + DBG(" .size = 0x%lx\n", lmb.reserved.region[i].size); } #endif /* DEBUG */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 6f55efd9be95..ed6ed2e30dac 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -46,9 +46,7 @@ #include <asm/prom.h> #include <asm/lmb.h> #include <asm/sections.h> -#ifdef CONFIG_PPC64 #include <asm/vdso.h> -#endif #include "mmu_decl.h" @@ -110,6 +108,7 @@ EXPORT_SYMBOL(phys_mem_access_prot); void online_page(struct page *page) { ClearPageReserved(page); + set_page_count(page, 0); free_cold_page(page); totalram_pages++; num_physpages++; @@ -127,6 +126,9 @@ int __devinit add_memory(u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + start += KERNELBASE; + create_section_mapping(start, start + size); + /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; @@ -198,6 +200,8 @@ void show_mem(void) unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { + if (!pfn_valid(pgdat->node_start_pfn + i)) + continue; page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -334,7 +338,7 @@ void __init mem_init(void) struct page *page; unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - num_physpages = max_pfn; /* RAM is assumed contiguous */ + num_physpages = lmb.memory.size >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -346,11 +350,13 @@ void __init mem_init(void) } } #else - max_mapnr = num_physpages; + max_mapnr = max_pfn; totalram_pages += free_all_bootmem(); #endif for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { + if (!pfn_valid(pgdat->node_start_pfn + i)) + continue; page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; @@ -393,10 +399,8 @@ void __init mem_init(void) mem_init_done = 1; -#ifdef CONFIG_PPC64 /* Initialize the vDSO */ vdso_init(); -#endif } /* @@ -491,7 +495,7 @@ EXPORT_SYMBOL(flush_icache_user_range); * We use it to preload an HPTE into the hash table corresponding to * the updated linux PTE. * - * This must always be called with the mm->page_table_lock held + * This must always be called with the pte lock held. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index a4d7a327c0e5..bea2d21ac6f7 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -33,7 +33,6 @@ extern void invalidate_tlbcam_entry(int index); extern int __map_without_bats; extern unsigned long ioremap_base; -extern unsigned long ioremap_bot; extern unsigned int rtas_data, rtas_size; extern PTE *Hash, *Hash_end; @@ -42,6 +41,7 @@ extern unsigned long Hash_size, Hash_mask; extern unsigned int num_tlbcam_entries; #endif +extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern unsigned long __initial_memory_limit; extern unsigned long total_memory; @@ -84,4 +84,16 @@ static inline void flush_HPTE(unsigned context, unsigned long va, else _tlbie(va); } +#else /* CONFIG_PPC64 */ +/* imalloc region types */ +#define IM_REGION_UNUSED 0x1 +#define IM_REGION_SUBSET 0x2 +#define IM_REGION_EXISTS 0x4 +#define IM_REGION_OVERLAP 0x8 +#define IM_REGION_SUPERSET 0x10 + +extern struct vm_struct * im_get_free_area(unsigned long size); +extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, + int region_type); +extern void im_free(void *addr); #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index da09ba03c424..ba7a3055a9fc 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -17,9 +17,8 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <asm/sparsemem.h> #include <asm/lmb.h> -#include <asm/machdep.h> -#include <asm/abs_addr.h> #include <asm/system.h> #include <asm/smp.h> @@ -28,45 +27,113 @@ static int numa_enabled = 1; static int numa_debug; #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } -#ifdef DEBUG_NUMA -#define ARRAY_INITIALISER -1 -#else -#define ARRAY_INITIALISER 0 -#endif - -int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] = - ARRAY_INITIALISER}; -char *numa_memory_lookup_table; +int numa_cpu_lookup_table[NR_CPUS]; cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; -int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; - struct pglist_data *node_data[MAX_NUMNODES]; -bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; + +EXPORT_SYMBOL(numa_cpu_lookup_table); +EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(node_data); + +static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; /* - * We need somewhere to store start/span for each node until we have + * We need somewhere to store start/end/node for each region until we have * allocated the real node_data structures. */ +#define MAX_REGIONS (MAX_LMB_REGIONS*2) static struct { - unsigned long node_start_pfn; - unsigned long node_end_pfn; - unsigned long node_present_pages; -} init_node_data[MAX_NUMNODES] __initdata; + unsigned long start_pfn; + unsigned long end_pfn; + int nid; +} init_node_data[MAX_REGIONS] __initdata; -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(numa_cpu_lookup_table); -EXPORT_SYMBOL(numa_memory_lookup_table); -EXPORT_SYMBOL(numa_cpumask_lookup_table); -EXPORT_SYMBOL(nr_cpus_in_node); +int __init early_pfn_to_nid(unsigned long pfn) +{ + unsigned int i; + + for (i = 0; init_node_data[i].end_pfn; i++) { + unsigned long start_pfn = init_node_data[i].start_pfn; + unsigned long end_pfn = init_node_data[i].end_pfn; + + if ((start_pfn <= pfn) && (pfn < end_pfn)) + return init_node_data[i].nid; + } + + return -1; +} + +void __init add_region(unsigned int nid, unsigned long start_pfn, + unsigned long pages) +{ + unsigned int i; + + dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", + nid, start_pfn, pages); + + for (i = 0; init_node_data[i].end_pfn; i++) { + if (init_node_data[i].nid != nid) + continue; + if (init_node_data[i].end_pfn == start_pfn) { + init_node_data[i].end_pfn += pages; + return; + } + if (init_node_data[i].start_pfn == (start_pfn + pages)) { + init_node_data[i].start_pfn -= pages; + return; + } + } + + /* + * Leave last entry NULL so we dont iterate off the end (we use + * entry.end_pfn to terminate the walk). + */ + if (i >= (MAX_REGIONS - 1)) { + printk(KERN_ERR "WARNING: too many memory regions in " + "numa code, truncating\n"); + return; + } + + init_node_data[i].start_pfn = start_pfn; + init_node_data[i].end_pfn = start_pfn + pages; + init_node_data[i].nid = nid; +} + +/* We assume init_node_data has no overlapping regions */ +void __init get_region(unsigned int nid, unsigned long *start_pfn, + unsigned long *end_pfn, unsigned long *pages_present) +{ + unsigned int i; + + *start_pfn = -1UL; + *end_pfn = *pages_present = 0; + + for (i = 0; init_node_data[i].end_pfn; i++) { + if (init_node_data[i].nid != nid) + continue; + + *pages_present += init_node_data[i].end_pfn - + init_node_data[i].start_pfn; + + if (init_node_data[i].start_pfn < *start_pfn) + *start_pfn = init_node_data[i].start_pfn; + + if (init_node_data[i].end_pfn > *end_pfn) + *end_pfn = init_node_data[i].end_pfn; + } + + /* We didnt find a matching region, return start/end as 0 */ + if (*start_pfn == -1UL) + *start_pfn = 0; +} static inline void map_cpu_to_node(int cpu, int node) { numa_cpu_lookup_table[cpu] = node; - if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) { + + if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) cpu_set(cpu, numa_cpumask_lookup_table[node]); - nr_cpus_in_node[node]++; - } } #ifdef CONFIG_HOTPLUG_CPU @@ -78,7 +145,6 @@ static void unmap_cpu_from_node(unsigned long cpu) if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { cpu_clear(cpu, numa_cpumask_lookup_table[node]); - nr_cpus_in_node[node]--; } else { printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", cpu, node); @@ -86,7 +152,7 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static struct device_node * __devinit find_cpu_node(unsigned int cpu) +static struct device_node *find_cpu_node(unsigned int cpu) { unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); struct device_node *cpu_node = NULL; @@ -213,7 +279,7 @@ static int __init get_mem_size_cells(void) return rc; } -static unsigned long read_n_cells(int n, unsigned int **buf) +static unsigned long __init read_n_cells(int n, unsigned int **buf) { unsigned long result = 0; @@ -295,7 +361,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, * or zero. If the returned value of size is 0 the region should be * discarded as it lies wholy above the memory limit. */ -static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) +static unsigned long __init numa_enforce_memory_limit(unsigned long start, + unsigned long size) { /* * We use lmb_end_of_DRAM() in here instead of memory_limit because @@ -320,8 +387,7 @@ static int __init parse_numa_properties(void) struct device_node *cpu = NULL; struct device_node *memory = NULL; int addr_cells, size_cells; - int max_domain = 0; - long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; + int max_domain; unsigned long i; if (numa_enabled == 0) { @@ -329,13 +395,6 @@ static int __init parse_numa_properties(void) return -1; } - numa_memory_lookup_table = - (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); - memset(numa_memory_lookup_table, 0, entries * sizeof(char)); - - for (i = 0; i < entries ; i++) - numa_memory_lookup_table[i] = ARRAY_INITIALISER; - min_common_depth = find_min_common_depth(); dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); @@ -387,9 +446,6 @@ new_range: start = read_n_cells(addr_cells, &memcell_buf); size = read_n_cells(size_cells, &memcell_buf); - start = _ALIGN_DOWN(start, MEMORY_INCREMENT); - size = _ALIGN_UP(size, MEMORY_INCREMENT); - numa_domain = of_node_numa_domain(memory); if (numa_domain >= MAX_NUMNODES) { @@ -403,44 +459,15 @@ new_range: if (max_domain < numa_domain) max_domain = numa_domain; - if (! (size = numa_enforce_memory_limit(start, size))) { + if (!(size = numa_enforce_memory_limit(start, size))) { if (--ranges) goto new_range; else continue; } - /* - * Initialize new node struct, or add to an existing one. - */ - if (init_node_data[numa_domain].node_end_pfn) { - if ((start / PAGE_SIZE) < - init_node_data[numa_domain].node_start_pfn) - init_node_data[numa_domain].node_start_pfn = - start / PAGE_SIZE; - if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) > - init_node_data[numa_domain].node_end_pfn) - init_node_data[numa_domain].node_end_pfn = - (start / PAGE_SIZE) + - (size / PAGE_SIZE); - - init_node_data[numa_domain].node_present_pages += - size / PAGE_SIZE; - } else { - node_set_online(numa_domain); - - init_node_data[numa_domain].node_start_pfn = - start / PAGE_SIZE; - init_node_data[numa_domain].node_end_pfn = - init_node_data[numa_domain].node_start_pfn + - size / PAGE_SIZE; - init_node_data[numa_domain].node_present_pages = - size / PAGE_SIZE; - } - - for (i = start ; i < (start+size); i += MEMORY_INCREMENT) - numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = - numa_domain; + add_region(numa_domain, start >> PAGE_SHIFT, + size >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -456,32 +483,18 @@ static void __init setup_nonnuma(void) { unsigned long top_of_ram = lmb_end_of_DRAM(); unsigned long total_ram = lmb_phys_mem_size(); - unsigned long i; + unsigned int i; printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); printk(KERN_INFO "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - if (!numa_memory_lookup_table) { - long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT; - numa_memory_lookup_table = - (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); - memset(numa_memory_lookup_table, 0, entries * sizeof(char)); - for (i = 0; i < entries ; i++) - numa_memory_lookup_table[i] = ARRAY_INITIALISER; - } - map_cpu_to_node(boot_cpuid, 0); - + for (i = 0; i < lmb.memory.cnt; ++i) + add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, + lmb_size_pages(&lmb.memory, i)); node_set_online(0); - - init_node_data[0].node_start_pfn = 0; - init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE; - init_node_data[0].node_present_pages = total_ram / PAGE_SIZE; - - for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) - numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; } static void __init dump_numa_topology(void) @@ -499,8 +512,9 @@ static void __init dump_numa_topology(void) count = 0; - for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) { - if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) { + for (i = 0; i < lmb_end_of_DRAM(); + i += (1 << SECTION_SIZE_BITS)) { + if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { if (count == 0) printk(" 0x%lx", i); ++count; @@ -525,10 +539,12 @@ static void __init dump_numa_topology(void) * * Returns the physical address of the memory. */ -static unsigned long careful_allocation(int nid, unsigned long size, - unsigned long align, unsigned long end) +static void __init *careful_allocation(int nid, unsigned long size, + unsigned long align, + unsigned long end_pfn) { - unsigned long ret = lmb_alloc_base(size, align, end); + int new_nid; + unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ if (!ret) @@ -542,28 +558,27 @@ static unsigned long careful_allocation(int nid, unsigned long size, * If the memory came from a previously allocated node, we must * retry with the bootmem allocator. */ - if (pa_to_nid(ret) < nid) { - nid = pa_to_nid(ret); - ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid), + new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); + if (new_nid < nid) { + ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), size, align, 0); if (!ret) panic("numa.c: cannot allocate %lu bytes on node %d", - size, nid); + size, new_nid); - ret = virt_to_abs(ret); + ret = __pa(ret); dbg("alloc_bootmem %lx %lx\n", ret, size); } - return ret; + return (void *)ret; } void __init do_init_bootmem(void) { int nid; - int addr_cells, size_cells; - struct device_node *memory = NULL; + unsigned int i; static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ @@ -581,99 +596,66 @@ void __init do_init_bootmem(void) register_cpu_notifier(&ppc64_numa_nb); for_each_online_node(nid) { - unsigned long start_paddr, end_paddr; - int i; + unsigned long start_pfn, end_pfn, pages_present; unsigned long bootmem_paddr; unsigned long bootmap_pages; - start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE; - end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE; + get_region(nid, &start_pfn, &end_pfn, &pages_present); /* Allocate the node structure node local if possible */ - NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid, + NODE_DATA(nid) = careful_allocation(nid, sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_paddr); - NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid)); + SMP_CACHE_BYTES, end_pfn); + NODE_DATA(nid) = __va(NODE_DATA(nid)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; - NODE_DATA(nid)->node_start_pfn = - init_node_data[nid].node_start_pfn; - NODE_DATA(nid)->node_spanned_pages = - end_paddr - start_paddr; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; if (NODE_DATA(nid)->node_spanned_pages == 0) continue; - dbg("start_paddr = %lx\n", start_paddr); - dbg("end_paddr = %lx\n", end_paddr); + dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); + dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT); + bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bootmem_paddr = (unsigned long)careful_allocation(nid, + bootmap_pages << PAGE_SHIFT, + PAGE_SIZE, end_pfn); + memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); - bootmem_paddr = careful_allocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_paddr); - memset(abs_to_virt(bootmem_paddr), 0, - bootmap_pages << PAGE_SHIFT); dbg("bootmap_paddr = %lx\n", bootmem_paddr); init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, - start_paddr >> PAGE_SHIFT, - end_paddr >> PAGE_SHIFT); + start_pfn, end_pfn); - /* - * We need to do another scan of all memory sections to - * associate memory with the correct node. - */ - addr_cells = get_mem_addr_cells(); - size_cells = get_mem_size_cells(); - memory = NULL; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { - unsigned long mem_start, mem_size; - int numa_domain, ranges; - unsigned int *memcell_buf; - unsigned int len; - - memcell_buf = (unsigned int *)get_property(memory, "reg", &len); - if (!memcell_buf || len <= 0) - continue; + /* Add free regions on this node */ + for (i = 0; init_node_data[i].end_pfn; i++) { + unsigned long start, end; - ranges = memory->n_addrs; /* ranges in cell */ -new_range: - mem_start = read_n_cells(addr_cells, &memcell_buf); - mem_size = read_n_cells(size_cells, &memcell_buf); - if (numa_enabled) { - numa_domain = of_node_numa_domain(memory); - if (numa_domain >= MAX_NUMNODES) - numa_domain = 0; - } else - numa_domain = 0; - - if (numa_domain != nid) + if (init_node_data[i].nid != nid) continue; - mem_size = numa_enforce_memory_limit(mem_start, mem_size); - if (mem_size) { - dbg("free_bootmem %lx %lx\n", mem_start, mem_size); - free_bootmem_node(NODE_DATA(nid), mem_start, mem_size); - } + start = init_node_data[i].start_pfn << PAGE_SHIFT; + end = init_node_data[i].end_pfn << PAGE_SHIFT; - if (--ranges) /* process all ranges in cell */ - goto new_range; + dbg("free_bootmem %lx %lx\n", start, end - start); + free_bootmem_node(NODE_DATA(nid), start, end - start); } - /* - * Mark reserved regions on this node - */ + /* Mark reserved regions on this node */ for (i = 0; i < lmb.reserved.cnt; i++) { unsigned long physbase = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; + unsigned long start_paddr = start_pfn << PAGE_SHIFT; + unsigned long end_paddr = end_pfn << PAGE_SHIFT; - if (pa_to_nid(physbase) != nid && - pa_to_nid(physbase+size-1) != nid) + if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && + early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) continue; if (physbase < end_paddr && @@ -693,46 +675,19 @@ new_range: size); } } - /* - * This loop may look famaliar, but we have to do it again - * after marking our reserved memory to mark memory present - * for sparsemem. - */ - addr_cells = get_mem_addr_cells(); - size_cells = get_mem_size_cells(); - memory = NULL; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { - unsigned long mem_start, mem_size; - int numa_domain, ranges; - unsigned int *memcell_buf; - unsigned int len; - - memcell_buf = (unsigned int *)get_property(memory, "reg", &len); - if (!memcell_buf || len <= 0) - continue; - ranges = memory->n_addrs; /* ranges in cell */ -new_range2: - mem_start = read_n_cells(addr_cells, &memcell_buf); - mem_size = read_n_cells(size_cells, &memcell_buf); - if (numa_enabled) { - numa_domain = of_node_numa_domain(memory); - if (numa_domain >= MAX_NUMNODES) - numa_domain = 0; - } else - numa_domain = 0; - - if (numa_domain != nid) + /* Add regions into sparsemem */ + for (i = 0; init_node_data[i].end_pfn; i++) { + unsigned long start, end; + + if (init_node_data[i].nid != nid) continue; - mem_size = numa_enforce_memory_limit(mem_start, mem_size); - memory_present(numa_domain, mem_start >> PAGE_SHIFT, - (mem_start + mem_size) >> PAGE_SHIFT); + start = init_node_data[i].start_pfn; + end = init_node_data[i].end_pfn; - if (--ranges) /* process all ranges in cell */ - goto new_range2; + memory_present(nid, start, end); } - } } @@ -746,21 +701,18 @@ void __init paging_init(void) memset(zholes_size, 0, sizeof(zholes_size)); for_each_online_node(nid) { - unsigned long start_pfn; - unsigned long end_pfn; + unsigned long start_pfn, end_pfn, pages_present; - start_pfn = init_node_data[nid].node_start_pfn; - end_pfn = init_node_data[nid].node_end_pfn; + get_region(nid, &start_pfn, &end_pfn, &pages_present); zones_size[ZONE_DMA] = end_pfn - start_pfn; - zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - - init_node_data[nid].node_present_pages; + zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start_pfn, zholes_size); + free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, + zholes_size); } } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 900842451bd3..2ffca63602c5 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -64,7 +64,8 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> -#include <asm/imalloc.h> + +#include "mmu_decl.h" unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -122,8 +123,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) * */ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, - mmu_virtual_psize)) - panic("Can't map bolted IO mapping"); + mmu_virtual_psize)) { + printk(KERN_ERR "Failed to do bolted mapping IO " + "memory at %016lx !\n", pa); + return -ENOMEM; + } } return 0; } diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index fa325dbf98fc..51e7951414e5 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -20,6 +20,7 @@ #include <asm/cputable.h> #include <asm/lmb.h> #include <asm/abs_addr.h> +#include <asm/firmware.h> struct stab_entry { unsigned long esid_data; @@ -256,7 +257,7 @@ void stabs_alloc(void) paca[cpu].stab_addr = newstab; paca[cpu].stab_real = virt_to_abs(newstab); - printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx " + printk(KERN_INFO "Segment table for CPU %d at 0x%lx " "virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); } @@ -270,10 +271,23 @@ void stabs_alloc(void) void stab_initialize(unsigned long stab) { unsigned long vsid = get_kernel_vsid(KERNELBASE); + unsigned long stabreal; asm volatile("isync; slbia; isync":::"memory"); make_ste(stab, GET_ESID(KERNELBASE), vsid); /* Order update */ asm volatile("sync":::"memory"); + + /* Set ASR */ + stabreal = get_paca()->stab_real | 0x1ul; + +#ifdef CONFIG_PPC_ISERIES + if (firmware_has_feature(FW_FEATURE_ISERIES)) { + HvCall1(HvCallBaseSetASR, stabreal); + return; + } +#endif /* CONFIG_PPC_ISERIES */ + + mtspr(SPRN_ASR, stabreal); } diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index 6c3dc3c44c86..ad580f3742e5 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -149,6 +149,12 @@ void flush_tlb_mm(struct mm_struct *mm) return; } + /* + * It is safe to go down the mm's list of vmas when called + * from dup_mmap, holding mmap_sem. It would also be safe from + * unmap_region or exit_mmap, but not from vmtruncate on SMP - + * but it seems dup_mmap is the only SMP case which gets here. + */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); FINISH_FLUSH; diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index 53e31b834ace..859d29a0cac5 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -95,7 +95,7 @@ static void pte_free_submit(struct pte_freelist_batch *batch) void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { - /* This is safe as we are holding page_table_lock */ + /* This is safe since tlb_gather_mmu has disabled preemption */ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); @@ -206,7 +206,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) void pte_free_finish(void) { - /* This is safe as we are holding page_table_lock */ + /* This is safe since tlb_gather_mmu has disabled preemption */ struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); if (*batchp == NULL) diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c index 86124a94c9af..26539cda6023 100644 --- a/arch/powerpc/oprofile/op_model_fsl_booke.c +++ b/arch/powerpc/oprofile/op_model_fsl_booke.c @@ -7,7 +7,7 @@ * Copyright (c) 2004 Freescale Semiconductor, Inc * * Author: Andy Fleming - * Maintainer: Kumar Gala <Kumar.Gala@freescale.com> + * Maintainer: Kumar Gala <galak@kernel.crashing.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index c4ee5478427b..a3401b46f3ba 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -14,7 +14,6 @@ #include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> -#include <asm/systemcfg.h> #include <asm/rtas.h> #include <asm/oprofile_impl.h> #include <asm/reg.h> @@ -233,8 +232,7 @@ static unsigned long get_pc(struct pt_regs *regs) mmcra = mfspr(SPRN_MMCRA); /* Were we in the hypervisor? */ - if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) && - (mmcra & MMCRA_SIHV)) + if (platform_is_lpar() && (mmcra & MMCRA_SIHV)) /* function descriptor madness */ return *((unsigned long *)hypervisor_bucket); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index ecd32d5d85f4..dda5f2c72c25 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -257,6 +257,13 @@ void __init chrp_setup_arch(void) if (rtas_token("display-character") >= 0) ppc_md.progress = rtas_progress; + /* use RTAS time-of-day routines if available */ + if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) { + ppc_md.get_boot_time = rtas_get_boot_time; + ppc_md.get_rtc_time = rtas_get_rtc_time; + ppc_md.set_rtc_time = rtas_set_rtc_time; + } + #ifdef CONFIG_BOOTX_TEXT if (ppc_md.progress == NULL && boot_text_mapped) ppc_md.progress = btext_progress; @@ -361,7 +368,9 @@ static void __init chrp_find_openpic(void) printk(KERN_INFO "OpenPIC at %lx\n", opaddr); irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ - prom_get_irq_senses(init_senses, NUM_8259_INTERRUPTS, NR_IRQS - 4); + prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4); + /* i8259 cascade is always positive level */ + init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE; iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len); if (iranges == NULL) @@ -503,9 +512,11 @@ void __init chrp_init(void) ppc_md.halt = rtas_halt; ppc_md.time_init = chrp_time_init; + ppc_md.calibrate_decr = chrp_calibrate_decr; + + /* this may get overridden with rtas routines later... */ ppc_md.set_rtc_time = chrp_set_rtc_time; ppc_md.get_rtc_time = chrp_get_rtc_time; - ppc_md.calibrate_decr = chrp_calibrate_decr; #ifdef CONFIG_SMP smp_ops = &chrp_smp_ops; diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index bb2315997d45..b616053bc331 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -34,6 +34,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/mpic.h> +#include <asm/rtas.h> static void __devinit smp_chrp_kick_cpu(int nr) { diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index 9e53535ddb82..737ee5d9f0aa 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -87,7 +87,6 @@ int chrp_set_rtc_time(struct rtc_time *tmarg) chrp_cmos_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - tm.tm_year -= 1900; if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { BIN_TO_BCD(tm.tm_sec); BIN_TO_BCD(tm.tm_min); @@ -156,7 +155,7 @@ void chrp_get_rtc_time(struct rtc_time *tm) BCD_TO_BIN(mon); BCD_TO_BIN(year); } - if ((year += 1900) < 1970) + if (year < 70) year += 100; tm->tm_sec = sec; tm->tm_min = min; diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index bf081b345820..2b54eeb2c899 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -3,7 +3,7 @@ * * Rewrite, cleanup: * - * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * Dynamic DMA mapping support, iSeries-specific parts. * diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index a06603d84a45..a58daa153686 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -42,13 +42,6 @@ #include "irq.h" #include "call_pci.h" -/* This maps virtual irq numbers to real irqs */ -unsigned int virt_irq_to_real_map[NR_IRQS]; - -/* The next available virtual irq number */ -/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */ -static int next_virtual_irq = 2; - static long Pci_Interrupt_Count; static long Pci_Event_Count; @@ -103,6 +96,9 @@ static void intReceived(struct XmPciLpEvent *eventParm, struct pt_regs *regsParm) { int irq; +#ifdef CONFIG_IRQSTACKS + struct thread_info *curtp, *irqtp; +#endif ++Pci_Interrupt_Count; @@ -110,7 +106,20 @@ static void intReceived(struct XmPciLpEvent *eventParm, case XmPciLpEvent_SlotInterrupt: irq = eventParm->hvLpEvent.xCorrelationToken; /* Dispatch the interrupt handlers for this irq */ - ppc_irq_dispatch_handler(regsParm, irq); +#ifdef CONFIG_IRQSTACKS + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[smp_processor_id()]; + if (curtp != irqtp) { + irqtp->task = curtp->task; + irqtp->flags = 0; + call___do_IRQ(irq, regsParm, irqtp); + irqtp->task = NULL; + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + } else +#endif + __do_IRQ(irq, regsParm); HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber, eventParm->eventData.slotInterrupt.subBusNumber, eventParm->eventData.slotInterrupt.deviceId); @@ -310,10 +319,8 @@ static void iSeries_disable_IRQ(unsigned int irq) } /* - * Need to define this so ppc_irq_dispatch_handler will NOT call - * enable_IRQ at the end of interrupt handling. However, this does - * nothing because there is not enough information provided to do - * the EOI HvCall. This is done by XmPciLpEvent.c + * This does nothing because there is not enough information + * provided to do the EOI HvCall. This is done by XmPciLpEvent.c */ static void iSeries_end_IRQ(unsigned int irq) { @@ -336,26 +343,14 @@ static hw_irq_controller iSeries_IRQ_handler = { int __init iSeries_allocate_IRQ(HvBusNumber busNumber, HvSubBusNumber subBusNumber, HvAgentId deviceId) { - unsigned int realirq, virtirq; + int virtirq; + unsigned int realirq; u8 idsel = (deviceId >> 4); u8 function = deviceId & 7; - virtirq = next_virtual_irq++; realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function; - virt_irq_to_real_map[virtirq] = realirq; + virtirq = virt_irq_create_mapping(realirq); irq_desc[virtirq].handler = &iSeries_IRQ_handler; return virtirq; } - -int virt_irq_create_mapping(unsigned int real_irq) -{ - BUG(); /* Don't call this on iSeries, yet */ - - return 0; -} - -void virt_irq_init(void) -{ - return; -} diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S index 09f14522e176..dfe7aa1ba098 100644 --- a/arch/powerpc/platforms/iseries/misc.S +++ b/arch/powerpc/platforms/iseries/misc.S @@ -15,6 +15,7 @@ #include <asm/processor.h> #include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> .text diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c index 4b75131773a6..dafc518fbb83 100644 --- a/arch/powerpc/platforms/iseries/pci.c +++ b/arch/powerpc/platforms/iseries/pci.c @@ -244,10 +244,9 @@ unsigned long __init find_and_init_phbs(void) if (ret == 0) { printk("bus %d appears to exist\n", bus); - phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL); + phb = pcibios_alloc_controller(NULL); if (phb == NULL) return -ENOMEM; - pci_setup_pci_controller(phb); phb->pci_mem_offset = phb->local_number = bus; phb->first_busno = bus; diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 7f8f0cda6a74..da26639190db 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -39,7 +39,7 @@ #include <asm/sections.h> #include <asm/iommu.h> #include <asm/firmware.h> - +#include <asm/system.h> #include <asm/time.h> #include <asm/paca.h> #include <asm/cache.h> @@ -71,7 +71,7 @@ extern void hvlog(char *fmt, ...); #endif /* Function Prototypes */ -static void build_iSeries_Memory_Map(void); +static unsigned long build_iSeries_Memory_Map(void); static void iseries_shared_idle(void); static void iseries_dedicated_idle(void); #ifdef CONFIG_PCI @@ -84,7 +84,6 @@ static void iSeries_pci_final_fixup(void) { } int piranha_simulator; extern int rd_size; /* Defined in drivers/block/rd.c */ -extern unsigned long klimit; extern unsigned long embedded_sysmap_start; extern unsigned long embedded_sysmap_end; @@ -403,9 +402,11 @@ void mschunks_alloc(unsigned long num_chunks) * a table used to translate Linux's physical addresses to these * absolute addresses. Absolute addresses are needed when * communicating with the hypervisor (e.g. to build HPT entries) + * + * Returns the physical memory size */ -static void __init build_iSeries_Memory_Map(void) +static unsigned long __init build_iSeries_Memory_Map(void) { u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize; u32 nextPhysChunk; @@ -538,7 +539,7 @@ static void __init build_iSeries_Memory_Map(void) * which should be equal to * nextPhysChunk */ - systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk); + return chunk_to_addr(nextPhysChunk); } /* @@ -546,8 +547,6 @@ static void __init build_iSeries_Memory_Map(void) */ static void __init iSeries_setup_arch(void) { - unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; - if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; printk(KERN_INFO "Using shared processor idle loop\n"); @@ -563,9 +562,6 @@ static void __init iSeries_setup_arch(void) itVpdAreas.xSlicMaxLogicalProcs); printk("Max physical processors = %d\n", itVpdAreas.xSlicMaxPhysicalProcs); - - systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR; - printk("Processor version = %x\n", systemcfg->processor); } static void iSeries_show_cpuinfo(struct seq_file *m) @@ -702,7 +698,6 @@ static void iseries_shared_idle(void) static void iseries_dedicated_idle(void) { - long oldval; set_thread_flag(TIF_POLLING_NRFLAG); while (1) { @@ -929,7 +924,7 @@ void dt_cpus(struct iseries_flat_dt *dt) dt_end_node(dt); } -void build_flat_dt(struct iseries_flat_dt *dt) +void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size) { u64 tmp[2]; @@ -945,7 +940,7 @@ void build_flat_dt(struct iseries_flat_dt *dt) dt_prop_str(dt, "name", "memory"); dt_prop_str(dt, "device_type", "memory"); tmp[0] = 0; - tmp[1] = systemcfg->physicalMemorySize; + tmp[1] = phys_mem_size; dt_prop_u64_list(dt, "reg", tmp, 2); dt_end_node(dt); @@ -965,13 +960,15 @@ void build_flat_dt(struct iseries_flat_dt *dt) void * __init iSeries_early_setup(void) { + unsigned long phys_mem_size; + iSeries_fixup_klimit(); /* * Initialize the table which translate Linux physical addresses to * AS/400 absolute addresses */ - build_iSeries_Memory_Map(); + phys_mem_size = build_iSeries_Memory_Map(); iSeries_get_cmdline(); @@ -981,7 +978,7 @@ void * __init iSeries_early_setup(void) /* Parse early parameters, in particular mem=x */ parse_early_param(); - build_flat_dt(&iseries_dt); + build_flat_dt(&iseries_dt, phys_mem_size); return (void *) __pa(&iseries_dt); } diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 340c21caeae2..f40451da037c 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -326,26 +326,12 @@ static int __init add_bridge(struct device_node *dev) dev->full_name); } - hose = alloc_bootmem(sizeof(struct pci_controller)); + hose = pcibios_alloc_controller(dev); if (hose == NULL) return -ENOMEM; - pci_setup_pci_controller(hose); - - hose->arch_data = dev; hose->first_busno = bus_range ? bus_range[0] : 0; hose->last_busno = bus_range ? bus_range[1] : 0xff; - of_prop = alloc_bootmem(sizeof(struct property) + - sizeof(hose->global_number)); - if (of_prop) { - memset(of_prop, 0, sizeof(struct property)); - of_prop->name = "linux,pci-domain"; - of_prop->length = sizeof(hose->global_number); - of_prop->value = (unsigned char *)&of_prop[1]; - memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number)); - prom_add_property(dev, of_prop); - } - disp_name = NULL; if (device_is_compatible(dev, "u3-agp")) { setup_u3_agp(hose); @@ -380,9 +366,6 @@ void __init maple_pcibios_fixup(void) for_each_pci_dev(dev) pci_read_irq_line(dev); - /* Do the mapping of the IO space */ - phbs_remap_io(); - DBG(" <- maple_pcibios_fixup\n"); } diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 40fc07a8e606..15846cc938ac 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -158,6 +158,11 @@ int maple_set_rtc_time(struct rtc_time *tm) return 0; } +static struct resource rtc_iores = { + .name = "rtc", + .flags = IORESOURCE_BUSY, +}; + unsigned long __init maple_get_boot_time(void) { struct rtc_time tm; @@ -172,7 +177,11 @@ unsigned long __init maple_get_boot_time(void) printk(KERN_INFO "Maple: No device node for RTC, assuming " "legacy address (0x%x)\n", maple_rtc_addr); } - + + rtc_iores.start = maple_rtc_addr; + rtc_iores.end = maple_rtc_addr + 7; + request_resource(&ioport_resource, &rtc_iores); + maple_get_rtc_time(&tm); return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 10f1d942c661..f6e22da2a5da 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1650,11 +1650,19 @@ void pmac_tweak_clock_spreading(int enable) */ if (macio->type == macio_intrepid) { - if (enable) - UN_OUT(UNI_N_CLOCK_SPREADING, 2); - else - UN_OUT(UNI_N_CLOCK_SPREADING, 0); - mdelay(40); + struct device_node *clock = + of_find_node_by_path("/uni-n@f8000000/hw-clock"); + if (clock && get_property(clock, "platform-do-clockspreading", + NULL)) { + printk(KERN_INFO "%sabling clock spreading on Intrepid" + " ASIC\n", enable ? "En" : "Dis"); + if (enable) + UN_OUT(UNI_N_CLOCK_SPREADING, 2); + else + UN_OUT(UNI_N_CLOCK_SPREADING, 0); + mdelay(40); + } + of_node_put(clock); } while (machine_is_compatible("PowerBook5,2") || @@ -1724,6 +1732,9 @@ void pmac_tweak_clock_spreading(int enable) pmac_low_i2c_close(ui2c); break; } + printk(KERN_INFO "%sabling clock spreading on i2c clock chip\n", + enable ? "En" : "Dis"); + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); DBG("write result: %d,", rc); @@ -2362,6 +2373,14 @@ static struct pmac_mb_def pmac_mb_defs[] = { PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, }, + { "PowerBook5,8", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,9", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, { "PowerBook6,1", "PowerBook G4 12\"", PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 8f818d092e2b..443be526cde7 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -640,15 +640,16 @@ static void __init setup_u3_ht(struct pci_controller* hose) * the reg address cell, we shall fix that by killing struct * reg_property and using some accessor functions instead */ - hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, + 0x02000000); /* - * /ht node doesn't expose a "ranges" property, so we "remove" regions that - * have been allocated to AGP. So far, this version of the code doesn't assign - * any of the 0xfxxxxxxx "fine" memory regions to /ht. - * We need to fix that sooner or later by either parsing all child "ranges" - * properties or figuring out the U3 address space decoding logic and - * then read its configuration register (if any). + * /ht node doesn't expose a "ranges" property, so we "remove" + * regions that have been allocated to AGP. So far, this version of + * the code doesn't assign any of the 0xfxxxxxxx "fine" memory regions + * to /ht. We need to fix that sooner or later by either parsing all + * child "ranges" properties or figuring out the U3 address space + * decoding logic and then read its configuration register (if any). */ hose->io_base_phys = 0xf4000000; hose->pci_io_size = 0x00400000; @@ -671,10 +672,10 @@ static void __init setup_u3_ht(struct pci_controller* hose) return; } - /* We "remove" the AGP resources from the resources allocated to HT, that - * is we create "holes". However, that code does assumptions that so far - * happen to be true (cross fingers...), typically that resources in the - * AGP node are properly ordered + /* We "remove" the AGP resources from the resources allocated to HT, + * that is we create "holes". However, that code does assumptions + * that so far happen to be true (cross fingers...), typically that + * resources in the AGP node are properly ordered */ cur = 0; for (i=0; i<3; i++) { @@ -684,23 +685,30 @@ static void __init setup_u3_ht(struct pci_controller* hose) /* We don't care about "fine" resources */ if (res->start >= 0xf0000000) continue; - /* Check if it's just a matter of "shrinking" us in one direction */ + /* Check if it's just a matter of "shrinking" us in one + * direction + */ if (hose->mem_resources[cur].start == res->start) { DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n", - cur, hose->mem_resources[cur].start, res->end + 1); + cur, hose->mem_resources[cur].start, + res->end + 1); hose->mem_resources[cur].start = res->end + 1; continue; } if (hose->mem_resources[cur].end == res->end) { DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n", - cur, hose->mem_resources[cur].end, res->start - 1); + cur, hose->mem_resources[cur].end, + res->start - 1); hose->mem_resources[cur].end = res->start - 1; continue; } /* No, it's not the case, we need a hole */ if (cur == 2) { - /* not enough resources for a hole, we drop part of the range */ - printk(KERN_WARNING "Running out of resources for /ht host !\n"); + /* not enough resources for a hole, we drop part + * of the range + */ + printk(KERN_WARNING "Running out of resources" + " for /ht host !\n"); hose->mem_resources[cur].end = res->start - 1; continue; } @@ -714,17 +722,6 @@ static void __init setup_u3_ht(struct pci_controller* hose) hose->mem_resources[cur-1].end = res->start - 1; } } - -/* XXX this needs to be converged between ppc32 and ppc64... */ -static struct pci_controller * __init pcibios_alloc_controller(void) -{ - struct pci_controller *hose; - - hose = alloc_bootmem(sizeof(struct pci_controller)); - if (hose) - pci_setup_pci_controller(hose); - return hose; -} #endif /* @@ -756,11 +753,16 @@ static int __init add_bridge(struct device_node *dev) #endif bus_range = (int *) get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", - dev->full_name); + printk(KERN_WARNING "Can't get bus-range for %s, assume" + " bus 0\n", dev->full_name); } + /* XXX Different prototypes, to be merged */ +#ifdef CONFIG_PPC64 + hose = pcibios_alloc_controller(dev); +#else hose = pcibios_alloc_controller(); +#endif if (!hose) return -ENOMEM; hose->arch_data = dev; @@ -768,7 +770,7 @@ static int __init add_bridge(struct device_node *dev) hose->last_busno = bus_range ? bus_range[1] : 0xff; disp_name = NULL; -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 if (device_is_compatible(dev, "u3-agp")) { setup_u3_agp(hose); disp_name = "U3-AGP"; @@ -918,9 +920,6 @@ void __init pmac_pci_init(void) PCI_DN(np)->busno = 0xf0; } - /* map in PCI I/O space */ - phbs_remap_io(); - /* pmac_check_ht_link(); */ /* Tell pci.c to not use the common resource allocation mechanism */ diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 83a49e80ac29..90040c49494d 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -74,6 +74,9 @@ static DEFINE_SPINLOCK(pmac_pic_lock); #define GATWICK_IRQ_POOL_SIZE 10 static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; +#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) +static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; + /* * Mark an irq as "lost". This is only used on the pmac * since it can lose interrupts (see pmac_set_irq_mask). diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index e1f9443cc872..fb2a7c798e82 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -34,6 +34,7 @@ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/cpu.h> +#include <linux/compiler.h> #include <asm/ptrace.h> #include <asm/atomic.h> @@ -305,9 +306,19 @@ static int __init smp_psurge_probe(void) psurge_start = ioremap(PSURGE_START, 4); psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); - /* this is not actually strictly necessary -- paulus. */ - for (i = 1; i < ncpus; ++i) - smp_hw_index[i] = i; + /* + * This is necessary because OF doesn't know about the + * secondary cpu(s), and thus there aren't nodes in the + * device tree for them, and smp_setup_cpu_maps hasn't + * set their bits in cpu_possible_map and cpu_present_map. + */ + if (ncpus > NR_CPUS) + ncpus = NR_CPUS; + for (i = 1; i < ncpus ; ++i) { + cpu_set(i, cpu_present_map); + cpu_set(i, cpu_possible_map); + set_hard_smp_processor_id(i, i); + } if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); @@ -348,6 +359,7 @@ static void __init psurge_dual_sync_tb(int cpu_nr) int t; set_dec(tb_ticks_per_jiffy); + /* XXX fixme */ set_tb(0, 0); last_jiffy_stamp(cpu_nr) = 0; @@ -363,8 +375,6 @@ static void __init psurge_dual_sync_tb(int cpu_nr) /* now interrupt the secondary, starting both TBs */ psurge_set_ipi(1); - - smp_tb_synchronized = 1; } static struct irqaction psurge_irqaction = { @@ -622,12 +632,12 @@ void smp_core99_give_timebase(void) mb(); /* wait for the secondary to have taken it */ - for (t = 100000; t > 0 && sec_tb_reset; --t) - udelay(10); + /* note: can't use udelay here, since it needs the timebase running */ + for (t = 10000000; t > 0 && sec_tb_reset; --t) + barrier(); if (sec_tb_reset) + /* XXX BUG_ON here? */ printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n"); - else - smp_tb_synchronized = 1; /* Now, restart the timebase by leaving the GPIO to an open collector */ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); @@ -810,19 +820,9 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr) } -/* Core99 Macs (dual G4s and G5s) */ -struct smp_ops_t core99_smp_ops = { - .message_pass = smp_mpic_message_pass, - .probe = smp_core99_probe, - .kick_cpu = smp_core99_kick_cpu, - .setup_cpu = smp_core99_setup_cpu, - .give_timebase = smp_core99_give_timebase, - .take_timebase = smp_core99_take_timebase, -}; - #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32) -int __cpu_disable(void) +int smp_core99_cpu_disable(void) { cpu_clear(smp_processor_id(), cpu_online_map); @@ -846,7 +846,7 @@ void cpu_die(void) low_cpu_die(); } -void __cpu_die(unsigned int cpu) +void smp_core99_cpu_die(unsigned int cpu) { int timeout; @@ -858,8 +858,21 @@ void __cpu_die(unsigned int cpu) } msleep(1); } - cpu_callin_map[cpu] = 0; cpu_dead[cpu] = 0; } #endif + +/* Core99 Macs (dual G4s and G5s) */ +struct smp_ops_t core99_smp_ops = { + .message_pass = smp_mpic_message_pass, + .probe = smp_core99_probe, + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_core99_give_timebase, + .take_timebase = smp_core99_take_timebase, +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32) + .cpu_disable = smp_core99_cpu_disable, + .cpu_die = smp_core99_cpu_die, +#endif +}; diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 5947b21a8588..feb0a94e7819 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -102,7 +102,7 @@ static unsigned long from_rtc_time(struct rtc_time *tm) static unsigned long cuda_get_time(void) { struct adb_request req; - unsigned long now; + unsigned int now; if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) return 0; @@ -113,7 +113,7 @@ static unsigned long cuda_get_time(void) req.reply_len); now = (req.reply[3] << 24) + (req.reply[4] << 16) + (req.reply[5] << 8) + req.reply[6]; - return now - RTC_OFFSET; + return ((unsigned long)now) - RTC_OFFSET; } #define cuda_get_rtc_time(tm) to_rtc_time(cuda_get_time(), (tm)) @@ -146,7 +146,7 @@ static int cuda_set_rtc_time(struct rtc_time *tm) static unsigned long pmu_get_time(void) { struct adb_request req; - unsigned long now; + unsigned int now; if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) return 0; @@ -156,7 +156,7 @@ static unsigned long pmu_get_time(void) req.reply_len); now = (req.reply[0] << 24) + (req.reply[1] << 16) + (req.reply[2] << 8) + req.reply[3]; - return now - RTC_OFFSET; + return ((unsigned long)now) - RTC_OFFSET; } #define pmu_get_rtc_time(tm) to_rtc_time(pmu_get_time(), (tm)) @@ -199,6 +199,7 @@ static unsigned long smu_get_time(void) #define smu_set_rtc_time(tm, spin) 0 #endif +/* Can't be __init, it's called when suspending and resuming */ unsigned long pmac_get_boot_time(void) { /* Get the time from the RTC, used only at boot time */ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index b9938fece781..06d5ef501218 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -3,3 +3,8 @@ obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o +obj-$(CONFIG_SCANLOG) += scanlog.o +obj-$(CONFIG_EEH) += eeh.o eeh_event.o + +obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_HVCS) += hvcserver.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c new file mode 100644 index 000000000000..c8d2a40dc5b4 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -0,0 +1,1213 @@ +/* + * eeh.c + * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/rbtree.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/rtas.h> + +#undef DEBUG + +/** Overview: + * EEH, or "Extended Error Handling" is a PCI bridge technology for + * dealing with PCI bus errors that can't be dealt with within the + * usual PCI framework, except by check-stopping the CPU. Systems + * that are designed for high-availability/reliability cannot afford + * to crash due to a "mere" PCI error, thus the need for EEH. + * An EEH-capable bridge operates by converting a detected error + * into a "slot freeze", taking the PCI adapter off-line, making + * the slot behave, from the OS'es point of view, as if the slot + * were "empty": all reads return 0xff's and all writes are silently + * ignored. EEH slot isolation events can be triggered by parity + * errors on the address or data busses (e.g. during posted writes), + * which in turn might be caused by low voltage on the bus, dust, + * vibration, humidity, radioactivity or plain-old failed hardware. + * + * Note, however, that one of the leading causes of EEH slot + * freeze events are buggy device drivers, buggy device microcode, + * or buggy device hardware. This is because any attempt by the + * device to bus-master data to a memory address that is not + * assigned to the device will trigger a slot freeze. (The idea + * is to prevent devices-gone-wild from corrupting system memory). + * Buggy hardware/drivers will have a miserable time co-existing + * with EEH. + * + * Ideally, a PCI device driver, when suspecting that an isolation + * event has occured (e.g. by reading 0xff's), will then ask EEH + * whether this is the case, and then take appropriate steps to + * reset the PCI slot, the PCI device, and then resume operations. + * However, until that day, the checking is done here, with the + * eeh_check_failure() routine embedded in the MMIO macros. If + * the slot is found to be isolated, an "EEH Event" is synthesized + * and sent out for processing. + */ + +/* If a device driver keeps reading an MMIO register in an interrupt + * handler after a slot isolation event has occurred, we assume it + * is broken and panic. This sets the threshold for how many read + * attempts we allow before panicking. + */ +#define EEH_MAX_FAILS 100000 + +/* Misc forward declaraions */ +static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn); + +/* RTAS tokens */ +static int ibm_set_eeh_option; +static int ibm_set_slot_reset; +static int ibm_read_slot_reset_state; +static int ibm_read_slot_reset_state2; +static int ibm_slot_error_detail; + +int eeh_subsystem_enabled; +EXPORT_SYMBOL(eeh_subsystem_enabled); + +/* Lock to avoid races due to multiple reports of an error */ +static DEFINE_SPINLOCK(confirm_error_lock); + +/* Buffer for reporting slot-error-detail rtas calls */ +static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(slot_errbuf_lock); +static int eeh_error_buf_size; + +/* System monitoring statistics */ +static DEFINE_PER_CPU(unsigned long, no_device); +static DEFINE_PER_CPU(unsigned long, no_dn); +static DEFINE_PER_CPU(unsigned long, no_cfg_addr); +static DEFINE_PER_CPU(unsigned long, ignored_check); +static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); +static DEFINE_PER_CPU(unsigned long, false_positives); +static DEFINE_PER_CPU(unsigned long, ignored_failures); +static DEFINE_PER_CPU(unsigned long, slot_resets); + +/** + * The pci address cache subsystem. This subsystem places + * PCI device address resources into a red-black tree, sorted + * according to the address range, so that given only an i/o + * address, the corresponding PCI device can be **quickly** + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. + * + * Currently, the only customer of this code is the EEH subsystem; + * thus, this code has been somewhat tailored to suit EEH better. + * In particular, the cache does *not* hold the addresses of devices + * for which EEH is not enabled. + * + * (Implementation Note: The RB tree seems to be better/faster + * than any hash algo I could think of for this problem, even + * with the penalty of slow pointer chases for d-cache misses). + */ +struct pci_io_addr_range +{ + struct rb_node rb_node; + unsigned long addr_lo; + unsigned long addr_hi; + struct pci_dev *pcidev; + unsigned int flags; +}; + +static struct pci_io_addr_cache +{ + struct rb_root rb_root; + spinlock_t piar_lock; +} pci_io_addr_cache_root; + +static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) +{ + struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; + + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (addr < piar->addr_lo) { + n = n->rb_left; + } else { + if (addr > piar->addr_hi) { + n = n->rb_right; + } else { + pci_dev_get(piar->pcidev); + return piar->pcidev; + } + } + } + + return NULL; +} + +/** + * pci_get_device_by_addr - Get device, given only address + * @addr: mmio (PIO) phys address or i/o port number + * + * Given an mmio phys address, or a port number, find a pci device + * that implements this address. Be sure to pci_dev_put the device + * when finished. I/O port numbers are assumed to be offset + * from zero (that is, they do *not* have pci_io_addr added in). + * It is safe to call this function within an interrupt. + */ +static struct pci_dev *pci_get_device_by_addr(unsigned long addr) +{ + struct pci_dev *dev; + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + dev = __pci_get_device_by_addr(addr); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); + return dev; +} + +#ifdef DEBUG +/* + * Handy-dandy debug print routine, does nothing more + * than print out the contents of our addr cache. + */ +static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +{ + struct rb_node *n; + int cnt = 0; + + n = rb_first(&cache->rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + cnt++; + n = rb_next(n); + } +} +#endif + +/* Insert address range into the rb tree. */ +static struct pci_io_addr_range * +pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, + unsigned long ahi, unsigned int flags) +{ + struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct pci_io_addr_range *piar; + + /* Walk tree, find a place to insert into tree */ + while (*p) { + parent = *p; + piar = rb_entry(parent, struct pci_io_addr_range, rb_node); + if (ahi < piar->addr_lo) { + p = &parent->rb_left; + } else if (alo > piar->addr_hi) { + p = &parent->rb_right; + } else { + if (dev != piar->pcidev || + alo != piar->addr_lo || ahi != piar->addr_hi) { + printk(KERN_WARNING "PIAR: overlapping address range\n"); + } + return piar; + } + } + piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + if (!piar) + return NULL; + + piar->addr_lo = alo; + piar->addr_hi = ahi; + piar->pcidev = dev; + piar->flags = flags; + +#ifdef DEBUG + printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", + alo, ahi, pci_name (dev)); +#endif + + rb_link_node(&piar->rb_node, parent, p); + rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); + + return piar; +} + +static void __pci_addr_cache_insert_device(struct pci_dev *dev) +{ + struct device_node *dn; + struct pci_dn *pdn; + int i; + int inserted = 0; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); + return; + } + + /* Skip any devices for which EEH is not enabled. */ + pdn = PCI_DN(dn); + if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || + pdn->eeh_mode & EEH_MODE_NOCHECK) { +#ifdef DEBUG + printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", + pci_name(dev), pdn->node->full_name); +#endif + return; + } + + /* The cache holds a reference to the device... */ + pci_dev_get(dev); + + /* Walk resources on this device, poke them into the tree */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long start = pci_resource_start(dev,i); + unsigned long end = pci_resource_end(dev,i); + unsigned int flags = pci_resource_flags(dev,i); + + /* We are interested only bus addresses, not dma or other stuff */ + if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if (start == 0 || ~start == 0 || end == 0 || ~end == 0) + continue; + pci_addr_cache_insert(dev, start, end, flags); + inserted = 1; + } + + /* If there was nothing to add, the cache has no reference... */ + if (!inserted) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_insert_device - Add a device to the address cache + * @dev: PCI device whose I/O addresses we are interested in. + * + * In order to support the fast lookup of devices based on addresses, + * we maintain a cache of devices that can be quickly searched. + * This routine adds a device to that cache. + */ +static void pci_addr_cache_insert_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_insert_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +{ + struct rb_node *n; + int removed = 0; + +restart: + n = rb_first(&pci_io_addr_cache_root.rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (piar->pcidev == dev) { + rb_erase(n, &pci_io_addr_cache_root.rb_root); + removed = 1; + kfree(piar); + goto restart; + } + n = rb_next(n); + } + + /* The cache no longer holds its reference to this device... */ + if (removed) + pci_dev_put(dev); +} + +/** + * pci_addr_cache_remove_device - remove pci device from addr cache + * @dev: device to remove + * + * Remove a device from the addr-cache tree. + * This is potentially expensive, since it will walk + * the tree multiple times (once per resource). + * But so what; device removal doesn't need to be that fast. + */ +static void pci_addr_cache_remove_device(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __pci_addr_cache_remove_device(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +/** + * pci_addr_cache_build - Build a cache of I/O addresses + * + * Build a cache of pci i/o addresses. This cache will be used to + * find the pci device that corresponds to a given address. + * This routine scans all pci busses to build the cache. + * Must be run late in boot process, after the pci controllers + * have been scaned for devices (after all device resources are known). + */ +void __init pci_addr_cache_build(void) +{ + struct device_node *dn; + struct pci_dev *dev = NULL; + + if (!eeh_subsystem_enabled) + return; + + spin_lock_init(&pci_io_addr_cache_root.piar_lock); + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + /* Ignore PCI bridges ( XXX why ??) */ + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { + continue; + } + pci_addr_cache_insert_device(dev); + + /* Save the BAR's; firmware doesn't restore these after EEH reset */ + dn = pci_device_to_OF_node(dev); + eeh_save_bars(dev, PCI_DN(dn)); + } + +#ifdef DEBUG + /* Verify tree built up above, echo back the list of addrs. */ + pci_addr_cache_print(&pci_io_addr_cache_root); +#endif +} + +/* --------------------------------------------------------------- */ +/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ + +void eeh_slot_error_detail (struct pci_dn *pdn, int severity) +{ + unsigned long flags; + int rc; + + /* Log the error with the rtas logger */ + spin_lock_irqsave(&slot_errbuf_lock, flags); + memset(slot_errbuf, 0, eeh_error_buf_size); + + rc = rtas_call(ibm_slot_error_detail, + 8, 1, NULL, pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), NULL, 0, + virt_to_phys(slot_errbuf), + eeh_error_buf_size, + severity); + + if (rc == 0) + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); + spin_unlock_irqrestore(&slot_errbuf_lock, flags); +} + +/** + * read_slot_reset_state - Read the reset state of a device node's slot + * @dn: device node to read + * @rets: array to return results in + */ +static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) +{ + int token, outputs; + + if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { + token = ibm_read_slot_reset_state2; + outputs = 4; + } else { + token = ibm_read_slot_reset_state; + rets[2] = 0; /* fake PE Unavailable info */ + outputs = 3; + } + + return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); +} + +/** + * eeh_token_to_phys - convert EEH address token to phys address + * @token i/o token, should be address in the form 0xA.... + */ +static inline unsigned long eeh_token_to_phys(unsigned long token) +{ + pte_t *ptep; + unsigned long pa; + + ptep = find_linux_pte(init_mm.pgd, token); + if (!ptep) + return token; + pa = pte_pfn(*ptep) << PAGE_SHIFT; + + return pa | (token & (PAGE_SIZE-1)); +} + +/** + * Return the "partitionable endpoint" (pe) under which this device lies + */ +static struct device_node * find_device_pe(struct device_node *dn) +{ + while ((dn->parent) && PCI_DN(dn->parent) && + (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { + dn = dn->parent; + } + return dn; +} + +/** Mark all devices that are peers of this device as failed. + * Mark the device driver too, so that it can see the failure + * immediately; this is critical, since some drivers poll + * status registers in interrupts ... If a driver is polling, + * and the slot is frozen, then the driver can deadlock in + * an interrupt context, which is bad. + */ + +static void __eeh_mark_slot (struct device_node *dn, int mode_flag) +{ + while (dn) { + if (PCI_DN(dn)) { + PCI_DN(dn)->eeh_mode |= mode_flag; + + if (dn->child) + __eeh_mark_slot (dn->child, mode_flag); + } + dn = dn->sibling; + } +} + +void eeh_mark_slot (struct device_node *dn, int mode_flag) +{ + dn = find_device_pe (dn); + PCI_DN(dn)->eeh_mode |= mode_flag; + __eeh_mark_slot (dn->child, mode_flag); +} + +static void __eeh_clear_slot (struct device_node *dn, int mode_flag) +{ + while (dn) { + if (PCI_DN(dn)) { + PCI_DN(dn)->eeh_mode &= ~mode_flag; + PCI_DN(dn)->eeh_check_count = 0; + if (dn->child) + __eeh_clear_slot (dn->child, mode_flag); + } + dn = dn->sibling; + } +} + +void eeh_clear_slot (struct device_node *dn, int mode_flag) +{ + unsigned long flags; + spin_lock_irqsave(&confirm_error_lock, flags); + dn = find_device_pe (dn); + PCI_DN(dn)->eeh_mode &= ~mode_flag; + PCI_DN(dn)->eeh_check_count = 0; + __eeh_clear_slot (dn->child, mode_flag); + spin_unlock_irqrestore(&confirm_error_lock, flags); +} + +/** + * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze + * @dn device node + * @dev pci device, if known + * + * Check for an EEH failure for the given device node. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze. This routine + * will query firmware for the EEH status. + * + * Returns 0 if there has not been an EEH error; otherwise returns + * a non-zero value and queues up a slot isolation event notification. + * + * It is safe to call this routine in an interrupt context. + */ +int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) +{ + int ret; + int rets[3]; + unsigned long flags; + struct pci_dn *pdn; + int rc = 0; + + __get_cpu_var(total_mmio_ffs)++; + + if (!eeh_subsystem_enabled) + return 0; + + if (!dn) { + __get_cpu_var(no_dn)++; + return 0; + } + pdn = PCI_DN(dn); + + /* Access to IO BARs might get this far and still not want checking. */ + if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || + pdn->eeh_mode & EEH_MODE_NOCHECK) { + __get_cpu_var(ignored_check)++; +#ifdef DEBUG + printk ("EEH:ignored check (%x) for %s %s\n", + pdn->eeh_mode, pci_name (dev), dn->full_name); +#endif + return 0; + } + + if (!pdn->eeh_config_addr) { + __get_cpu_var(no_cfg_addr)++; + return 0; + } + + /* If we already have a pending isolation event for this + * slot, we know it's bad already, we don't need to check. + * Do this checking under a lock; as multiple PCI devices + * in one slot might report errors simultaneously, and we + * only want one error recovery routine running. + */ + spin_lock_irqsave(&confirm_error_lock, flags); + rc = 1; + if (pdn->eeh_mode & EEH_MODE_ISOLATED) { + pdn->eeh_check_count ++; + if (pdn->eeh_check_count >= EEH_MAX_FAILS) { + printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n", + pdn->eeh_check_count); + dump_stack(); + + /* re-read the slot reset state */ + if (read_slot_reset_state(pdn, rets) != 0) + rets[0] = -1; /* reset state unknown */ + + /* If we are here, then we hit an infinite loop. Stop. */ + panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev)); + } + goto dn_unlock; + } + + /* + * Now test for an EEH failure. This is VERY expensive. + * Note that the eeh_config_addr may be a parent device + * in the case of a device behind a bridge, or it may be + * function zero of a multi-function device. + * In any case they must share a common PHB. + */ + ret = read_slot_reset_state(pdn, rets); + + /* If the call to firmware failed, punt */ + if (ret != 0) { + printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", + ret, dn->full_name); + __get_cpu_var(false_positives)++; + rc = 0; + goto dn_unlock; + } + + /* If EEH is not supported on this device, punt. */ + if (rets[1] != 1) { + printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", + ret, dn->full_name); + __get_cpu_var(false_positives)++; + rc = 0; + goto dn_unlock; + } + + /* If not the kind of error we know about, punt. */ + if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { + __get_cpu_var(false_positives)++; + rc = 0; + goto dn_unlock; + } + + /* Note that config-io to empty slots may fail; + * we recognize empty because they don't have children. */ + if ((rets[0] == 5) && (dn->child == NULL)) { + __get_cpu_var(false_positives)++; + rc = 0; + goto dn_unlock; + } + + __get_cpu_var(slot_resets)++; + + /* Avoid repeated reports of this failure, including problems + * with other functions on this device, and functions under + * bridges. */ + eeh_mark_slot (dn, EEH_MODE_ISOLATED); + spin_unlock_irqrestore(&confirm_error_lock, flags); + + eeh_send_failure_event (dn, dev, rets[0], rets[2]); + + /* Most EEH events are due to device driver bugs. Having + * a stack trace will help the device-driver authors figure + * out what happened. So print that out. */ + if (rets[0] != 5) dump_stack(); + return 1; + +dn_unlock: + spin_unlock_irqrestore(&confirm_error_lock, flags); + return rc; +} + +EXPORT_SYMBOL_GPL(eeh_dn_check_failure); + +/** + * eeh_check_failure - check if all 1's data is due to EEH slot freeze + * @token i/o token, should be address in the form 0xA.... + * @val value, should be all 1's (XXX why do we need this arg??) + * + * Check for an EEH failure at the given token address. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze event. This routine + * will query firmware for the EEH status. + * + * Note this routine is safe to call in an interrupt context. + */ +unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +{ + unsigned long addr; + struct pci_dev *dev; + struct device_node *dn; + + /* Finding the phys addr + pci device; this is pretty quick. */ + addr = eeh_token_to_phys((unsigned long __force) token); + dev = pci_get_device_by_addr(addr); + if (!dev) { + __get_cpu_var(no_device)++; + return val; + } + + dn = pci_device_to_OF_node(dev); + eeh_dn_check_failure (dn, dev); + + pci_dev_put(dev); + return val; +} + +EXPORT_SYMBOL(eeh_check_failure); + +/* ------------------------------------------------------------- */ +/* The code below deals with error recovery */ + +/** Return negative value if a permanent error, else return + * a number of milliseconds to wait until the PCI slot is + * ready to be used. + */ +static int +eeh_slot_availability(struct pci_dn *pdn) +{ + int rc; + int rets[3]; + + rc = read_slot_reset_state(pdn, rets); + + if (rc) return rc; + + if (rets[1] == 0) return -1; /* EEH is not supported */ + if (rets[0] == 0) return 0; /* Oll Korrect */ + if (rets[0] == 5) { + if (rets[2] == 0) return -1; /* permanently unavailable */ + return rets[2]; /* number of millisecs to wait */ + } + return -1; +} + +/** rtas_pci_slot_reset raises/lowers the pci #RST line + * state: 1/0 to raise/lower the #RST + * + * Clear the EEH-frozen condition on a slot. This routine + * asserts the PCI #RST line if the 'state' argument is '1', + * and drops the #RST line if 'state is '0'. This routine is + * safe to call in an interrupt context. + * + */ + +static void +rtas_pci_slot_reset(struct pci_dn *pdn, int state) +{ + int rc; + + BUG_ON (pdn==NULL); + + if (!pdn->phb) { + printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", + pdn->node->full_name); + return; + } + + rc = rtas_call(ibm_set_slot_reset,4,1, NULL, + pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), + state); + if (rc) { + printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n", + rc, state, pdn->node->full_name); + return; + } +} + +/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second + * dn -- device node to be reset. + */ + +void +rtas_set_slot_reset(struct pci_dn *pdn) +{ + int i, rc; + + rtas_pci_slot_reset (pdn, 1); + + /* The PCI bus requires that the reset be held high for at least + * a 100 milliseconds. We wait a bit longer 'just in case'. */ + +#define PCI_BUS_RST_HOLD_TIME_MSEC 250 + msleep (PCI_BUS_RST_HOLD_TIME_MSEC); + + /* We might get hit with another EEH freeze as soon as the + * pci slot reset line is dropped. Make sure we don't miss + * these, and clear the flag now. */ + eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED); + + rtas_pci_slot_reset (pdn, 0); + + /* After a PCI slot has been reset, the PCI Express spec requires + * a 1.5 second idle time for the bus to stabilize, before starting + * up traffic. */ +#define PCI_BUS_SETTLE_TIME_MSEC 1800 + msleep (PCI_BUS_SETTLE_TIME_MSEC); + + /* Now double check with the firmware to make sure the device is + * ready to be used; if not, wait for recovery. */ + for (i=0; i<10; i++) { + rc = eeh_slot_availability (pdn); + if (rc <= 0) break; + + msleep (rc+100); + } +} + +/* ------------------------------------------------------- */ +/** Save and restore of PCI BARs + * + * Although firmware will set up BARs during boot, it doesn't + * set up device BAR's after a device reset, although it will, + * if requested, set up bridge configuration. Thus, we need to + * configure the PCI devices ourselves. + */ + +/** + * __restore_bars - Restore the Base Address Registers + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static inline void __restore_bars (struct pci_dn *pdn) +{ + int i; + + if (NULL==pdn->phb) return; + for (i=4; i<10; i++) { + rtas_write_config(pdn, i*4, 4, pdn->config_space[i]); + } + + /* 12 == Expansion ROM Address */ + rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]); + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) + + rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + + rtas_write_config (pdn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + + /* max latency, min grant, interrupt pin and line */ + rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); +} + +/** + * eeh_restore_bars - restore the PCI config space info + * + * This routine performs a recursive walk to the children + * of this device as well. + */ +void eeh_restore_bars(struct pci_dn *pdn) +{ + struct device_node *dn; + if (!pdn) + return; + + if (! pdn->eeh_is_bridge) + __restore_bars (pdn); + + dn = pdn->node->child; + while (dn) { + eeh_restore_bars (PCI_DN(dn)); + dn = dn->sibling; + } +} + +/** + * eeh_save_bars - save device bars + * + * Save the values of the device bars. Unlike the restore + * routine, this routine is *not* recursive. This is because + * PCI devices are added individuallly; but, for the restore, + * an entire slot is reset at a time. + */ +static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn) +{ + int i; + + if (!pdev || !pdn ) + return; + + for (i = 0; i < 16; i++) + pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]); + + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + pdn->eeh_is_bridge = 1; +} + +void +rtas_configure_bridge(struct pci_dn *pdn) +{ + int token = rtas_token ("ibm,configure-bridge"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return; + rc = rtas_call(token,3,1, NULL, + pdn->eeh_config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + if (rc) { + printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", + rc, pdn->node->full_name); + } +} + +/* ------------------------------------------------------------- */ +/* The code below deals with enabling EEH for devices during the + * early boot sequence. EEH must be enabled before any PCI probing + * can be done. + */ + +#define EEH_ENABLE 1 + +struct eeh_early_enable_info { + unsigned int buid_hi; + unsigned int buid_lo; +}; + +/* Enable eeh for the given device node. */ +static void *early_enable_eeh(struct device_node *dn, void *data) +{ + struct eeh_early_enable_info *info = data; + int ret; + char *status = get_property(dn, "status", NULL); + u32 *class_code = (u32 *)get_property(dn, "class-code", NULL); + u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL); + u32 *device_id = (u32 *)get_property(dn, "device-id", NULL); + u32 *regs; + int enable; + struct pci_dn *pdn = PCI_DN(dn); + + pdn->eeh_mode = 0; + pdn->eeh_check_count = 0; + pdn->eeh_freeze_count = 0; + + if (status && strcmp(status, "ok") != 0) + return NULL; /* ignore devices with bad status */ + + /* Ignore bad nodes. */ + if (!class_code || !vendor_id || !device_id) + return NULL; + + /* There is nothing to check on PCI to ISA bridges */ + if (dn->type && !strcmp(dn->type, "isa")) { + pdn->eeh_mode |= EEH_MODE_NOCHECK; + return NULL; + } + + /* + * Now decide if we are going to "Disable" EEH checking + * for this device. We still run with the EEH hardware active, + * but we won't be checking for ff's. This means a driver + * could return bad data (very bad!), an interrupt handler could + * hang waiting on status bits that won't change, etc. + * But there are a few cases like display devices that make sense. + */ + enable = 1; /* i.e. we will do checking */ + if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) + enable = 0; + + if (!enable) + pdn->eeh_mode |= EEH_MODE_NOCHECK; + + /* Ok... see if this device supports EEH. Some do, some don't, + * and the only way to find out is to check each and every one. */ + regs = (u32 *)get_property(dn, "reg", NULL); + if (regs) { + /* First register entry is addr (00BBSS00) */ + /* Try to enable eeh */ + ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, + regs[0], info->buid_hi, info->buid_lo, + EEH_ENABLE); + + if (ret == 0) { + eeh_subsystem_enabled = 1; + pdn->eeh_mode |= EEH_MODE_SUPPORTED; + pdn->eeh_config_addr = regs[0]; +#ifdef DEBUG + printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name); +#endif + } else { + + /* This device doesn't support EEH, but it may have an + * EEH parent, in which case we mark it as supported. */ + if (dn->parent && PCI_DN(dn->parent) + && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { + /* Parent supports EEH. */ + pdn->eeh_mode |= EEH_MODE_SUPPORTED; + pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr; + return NULL; + } + } + } else { + printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", + dn->full_name); + } + + return NULL; +} + +/* + * Initialize EEH by trying to enable it for all of the adapters in the system. + * As a side effect we can determine here if eeh is supported at all. + * Note that we leave EEH on so failed config cycles won't cause a machine + * check. If a user turns off EEH for a particular adapter they are really + * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't + * grant access to a slot if EEH isn't enabled, and so we always enable + * EEH for all slots/all devices. + * + * The eeh-force-off option disables EEH checking globally, for all slots. + * Even if force-off is set, the EEH hardware is still enabled, so that + * newer systems can boot. + */ +void __init eeh_init(void) +{ + struct device_node *phb, *np; + struct eeh_early_enable_info info; + + spin_lock_init(&confirm_error_lock); + spin_lock_init(&slot_errbuf_lock); + + np = of_find_node_by_path("/rtas"); + if (np == NULL) + return; + + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); + ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); + ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); + + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) + return; + + eeh_error_buf_size = rtas_token("rtas-error-log-max"); + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { + eeh_error_buf_size = 1024; + } + if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { + printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated " + "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX); + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; + } + + /* Enable EEH for all adapters. Note that eeh requires buid's */ + for (phb = of_find_node_by_name(NULL, "pci"); phb; + phb = of_find_node_by_name(phb, "pci")) { + unsigned long buid; + + buid = get_phb_buid(phb); + if (buid == 0 || PCI_DN(phb) == NULL) + continue; + + info.buid_lo = BUID_LO(buid); + info.buid_hi = BUID_HI(buid); + traverse_pci_devices(phb, early_enable_eeh, &info); + } + + if (eeh_subsystem_enabled) + printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + printk(KERN_WARNING "EEH: No capable adapters found\n"); +} + +/** + * eeh_add_device_early - enable EEH for the indicated device_node + * @dn: device node for which to set up EEH + * + * This routine must be used to perform EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + * This routine must be called before any i/o is performed to the + * adapter (inluding any config-space i/o). + * Whether this actually enables EEH or not for this device depends + * on the CEC architecture, type of the device, on earlier boot + * command-line arguments & etc. + */ +void eeh_add_device_early(struct device_node *dn) +{ + struct pci_controller *phb; + struct eeh_early_enable_info info; + + if (!dn || !PCI_DN(dn)) + return; + phb = PCI_DN(dn)->phb; + if (NULL == phb || 0 == phb->buid) { + printk(KERN_WARNING "EEH: Expected buid but found none for %s\n", + dn->full_name); + dump_stack(); + return; + } + + info.buid_hi = BUID_HI(phb->buid); + info.buid_lo = BUID_LO(phb->buid); + early_enable_eeh(dn, &info); +} +EXPORT_SYMBOL_GPL(eeh_add_device_early); + +/** + * eeh_add_device_late - perform EEH initialization for the indicated pci device + * @dev: pci device for which to set up EEH + * + * This routine must be used to complete EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + */ +void eeh_add_device_late(struct pci_dev *dev) +{ + struct device_node *dn; + struct pci_dn *pdn; + + if (!dev || !eeh_subsystem_enabled) + return; + +#ifdef DEBUG + printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev)); +#endif + + pci_dev_get (dev); + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + pdn->pcidev = dev; + + pci_addr_cache_insert_device (dev); + eeh_save_bars(dev, pdn); +} +EXPORT_SYMBOL_GPL(eeh_add_device_late); + +/** + * eeh_remove_device - undo EEH setup for the indicated pci device + * @dev: pci device to be removed + * + * This routine should be when a device is removed from a running + * system (e.g. by hotplug or dlpar). + */ +void eeh_remove_device(struct pci_dev *dev) +{ + struct device_node *dn; + if (!dev || !eeh_subsystem_enabled) + return; + + /* Unregister the device with the EEH/PCI address search system */ +#ifdef DEBUG + printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev)); +#endif + pci_addr_cache_remove_device(dev); + + dn = pci_device_to_OF_node(dev); + PCI_DN(dn)->pcidev = NULL; + pci_dev_put (dev); +} +EXPORT_SYMBOL_GPL(eeh_remove_device); + +static int proc_eeh_show(struct seq_file *m, void *v) +{ + unsigned int cpu; + unsigned long ffs = 0, positives = 0, failures = 0; + unsigned long resets = 0; + unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0; + + for_each_cpu(cpu) { + ffs += per_cpu(total_mmio_ffs, cpu); + positives += per_cpu(false_positives, cpu); + failures += per_cpu(ignored_failures, cpu); + resets += per_cpu(slot_resets, cpu); + no_dev += per_cpu(no_device, cpu); + no_dn += per_cpu(no_dn, cpu); + no_cfg += per_cpu(no_cfg_addr, cpu); + no_check += per_cpu(ignored_check, cpu); + } + + if (0 == eeh_subsystem_enabled) { + seq_printf(m, "EEH Subsystem is globally disabled\n"); + seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); + } else { + seq_printf(m, "EEH Subsystem is enabled\n"); + seq_printf(m, + "no device=%ld\n" + "no device node=%ld\n" + "no config address=%ld\n" + "check not wanted=%ld\n" + "eeh_total_mmio_ffs=%ld\n" + "eeh_false_positives=%ld\n" + "eeh_ignored_failures=%ld\n" + "eeh_slot_resets=%ld\n", + no_dev, no_dn, no_cfg, no_check, + ffs, positives, failures, resets); + } + + return 0; +} + +static int proc_eeh_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_eeh_show, NULL); +} + +static struct file_operations proc_eeh_operations = { + .open = proc_eeh_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init eeh_init_proc(void) +{ + struct proc_dir_entry *e; + + if (platform_is_pseries()) { + e = create_proc_entry("ppc64/eeh", 0, NULL); + if (e) + e->proc_fops = &proc_eeh_operations; + } + + return 0; +} +__initcall(eeh_init_proc); diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c new file mode 100644 index 000000000000..92497333c2b6 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -0,0 +1,155 @@ +/* + * eeh_event.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2005 Linas Vepstas <linas@linas.org> + */ + +#include <linux/list.h> +#include <linux/pci.h> +#include <asm/eeh_event.h> + +/** Overview: + * EEH error states may be detected within exception handlers; + * however, the recovery processing needs to occur asynchronously + * in a normal kernel context and not an interrupt context. + * This pair of routines creates an event and queues it onto a + * work-queue, where a worker thread can drive recovery. + */ + +/* EEH event workqueue setup. */ +static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; +LIST_HEAD(eeh_eventlist); +static void eeh_thread_launcher(void *); +DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); + +/** + * eeh_panic - call panic() for an eeh event that cannot be handled. + * The philosophy of this routine is that it is better to panic and + * halt the OS than it is to risk possible data corruption by + * oblivious device drivers that don't know better. + * + * @dev pci device that had an eeh event + * @reset_state current reset state of the device slot + */ +static void eeh_panic(struct pci_dev *dev, int reset_state) +{ + /* + * Since the panic_on_oops sysctl is used to halt the system + * in light of potential corruption, we can use it here. + */ + if (panic_on_oops) { + panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, + pci_name(dev)); + } + else { + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", + reset_state, pci_name(dev)); + } +} + +/** + * eeh_event_handler - dispatch EEH events. The detection of a frozen + * slot can occur inside an interrupt, where it can be hard to do + * anything about it. The goal of this routine is to pull these + * detection events out of the context of the interrupt handler, and + * re-dispatch them for processing at a later time in a normal context. + * + * @dummy - unused + */ +static int eeh_event_handler(void * dummy) +{ + unsigned long flags; + struct eeh_event *event; + + daemonize ("eehd"); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (event == NULL) + break; + + printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", + pci_name(event->dev)); + + eeh_panic (event->dev, event->state); + + kfree(event); + } + + return 0; +} + +/** + * eeh_thread_launcher + * + * @dummy - unused + */ +static void eeh_thread_launcher(void *dummy) +{ + if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) + printk(KERN_ERR "Failed to start EEH daemon\n"); +} + +/** + * eeh_send_failure_event - generate a PCI error event + * @dev pci device + * + * This routine can be called within an interrupt context; + * the actual event will be delivered in a normal context + * (from a workqueue). + */ +int eeh_send_failure_event (struct device_node *dn, + struct pci_dev *dev, + int state, + int time_unavail) +{ + unsigned long flags; + struct eeh_event *event; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (event == NULL) { + printk (KERN_ERR "EEH: out of memory, event not handled\n"); + return 1; + } + + if (dev) + pci_dev_get(dev); + + event->dn = dn; + event->dev = dev; + event->state = state; + event->time_unavail = time_unavail; + + /* We may or may not be called in an interrupt context */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + + schedule_work(&eeh_event_wq); + + return 0; +} + +/********************** END OF FILE ******************************/ diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c new file mode 100644 index 000000000000..138e128a3886 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -0,0 +1,74 @@ +/* + * hvconsole.c + * Copyright (C) 2004 Hollis Blanchard, IBM Corporation + * Copyright (C) 2004 IBM Corporation + * + * Additional Author(s): + * Ryan S. Arnold <rsa@us.ibm.com> + * + * LPAR console support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/hvcall.h> +#include <asm/hvconsole.h> + +/** + * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper + * @vtermno: The vtermno or unit_address of the adapter from which to fetch the + * data. + * @buf: The character buffer into which to put the character data fetched from + * firmware. + * @count: not used? + */ +int hvc_get_chars(uint32_t vtermno, char *buf, int count) +{ + unsigned long got; + + if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) + return got; + return 0; +} + +EXPORT_SYMBOL(hvc_get_chars); + + +/** + * hvc_put_chars: send characters to firmware for denoted vterm adapter + * @vtermno: The vtermno or unit_address of the adapter from which the data + * originated. + * @buf: The character buffer that contains the character data to send to + * firmware. + * @count: Send this number of characters. + */ +int hvc_put_chars(uint32_t vtermno, const char *buf, int count) +{ + unsigned long *lbuf = (unsigned long *) buf; + long ret; + + ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0], + lbuf[1]); + if (ret == H_Success) + return count; + if (ret == H_Busy) + return 0; + return -EIO; +} + +EXPORT_SYMBOL(hvc_put_chars); diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c new file mode 100644 index 000000000000..4d584172055a --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -0,0 +1,251 @@ +/* + * hvcserver.c + * Copyright (C) 2004 Ryan S Arnold, IBM Corporation + * + * PPC64 virtual I/O console server support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/slab.h> + +#include <asm/hvcall.h> +#include <asm/hvcserver.h> +#include <asm/io.h> + +#define HVCS_ARCH_VERSION "1.0.0" + +MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>"); +MODULE_DESCRIPTION("IBM hvcs ppc64 API"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(HVCS_ARCH_VERSION); + +/* + * Convert arch specific return codes into relevant errnos. The hvcs + * functions aren't performance sensitive, so this conversion isn't an + * issue. + */ +int hvcs_convert(long to_convert) +{ + switch (to_convert) { + case H_Success: + return 0; + case H_Parameter: + return -EINVAL; + case H_Hardware: + return -EIO; + case H_Busy: + case H_LongBusyOrder1msec: + case H_LongBusyOrder10msec: + case H_LongBusyOrder100msec: + case H_LongBusyOrder1sec: + case H_LongBusyOrder10sec: + case H_LongBusyOrder100sec: + return -EBUSY; + case H_Function: /* fall through */ + default: + return -EPERM; + } +} + +/** + * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info + * @head: list_head pointer for an allocated list of partner info structs to + * free. + * + * This function is used to free the partner info list that was returned by + * calling hvcs_get_partner_info(). + */ +int hvcs_free_partner_info(struct list_head *head) +{ + struct hvcs_partner_info *pi; + struct list_head *element; + + if (!head) + return -EINVAL; + + while (!list_empty(head)) { + element = head->next; + pi = list_entry(element, struct hvcs_partner_info, node); + list_del(element); + kfree(pi); + } + + return 0; +} +EXPORT_SYMBOL(hvcs_free_partner_info); + +/* Helper function for hvcs_get_partner_info */ +int hvcs_next_partner(uint32_t unit_address, + unsigned long last_p_partition_ID, + unsigned long last_p_unit_address, unsigned long *pi_buff) + +{ + long retval; + retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address, + last_p_partition_ID, + last_p_unit_address, virt_to_phys(pi_buff)); + return hvcs_convert(retval); +} + +/** + * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter + * @unit_address: The unit_address of the vty-server adapter for which this + * function is fetching partner info. + * @head: An initialized list_head pointer to an empty list to use to return the + * list of partner info fetched from the hypervisor to the caller. + * @pi_buff: A page sized buffer pre-allocated prior to calling this function + * that is to be used to be used by firmware as an iterator to keep track + * of the partner info retrieval. + * + * This function returns non-zero on success, or if there is no partner info. + * + * The pi_buff is pre-allocated prior to calling this function because this + * function may be called with a spin_lock held and kmalloc of a page is not + * recommended as GFP_ATOMIC. + * + * The first long of this buffer is used to store a partner unit address. The + * second long is used to store a partner partition ID and starting at + * pi_buff[2] is the 79 character Converged Location Code (diff size than the + * unsigned longs, hence the casting mumbo jumbo you see later). + * + * Invocation of this function should always be followed by an invocation of + * hvcs_free_partner_info() using a pointer to the SAME list head instance + * that was passed as a parameter to this function. + */ +int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, + unsigned long *pi_buff) +{ + /* + * Dealt with as longs because of the hcall interface even though the + * values are uint32_t. + */ + unsigned long last_p_partition_ID; + unsigned long last_p_unit_address; + struct hvcs_partner_info *next_partner_info = NULL; + int more = 1; + int retval; + + memset(pi_buff, 0x00, PAGE_SIZE); + /* invalid parameters */ + if (!head || !pi_buff) + return -EINVAL; + + last_p_partition_ID = last_p_unit_address = ~0UL; + INIT_LIST_HEAD(head); + + do { + retval = hvcs_next_partner(unit_address, last_p_partition_ID, + last_p_unit_address, pi_buff); + if (retval) { + /* + * Don't indicate that we've failed if we have + * any list elements. + */ + if (!list_empty(head)) + return 0; + return retval; + } + + last_p_partition_ID = pi_buff[0]; + last_p_unit_address = pi_buff[1]; + + /* This indicates that there are no further partners */ + if (last_p_partition_ID == ~0UL + && last_p_unit_address == ~0UL) + break; + + /* This is a very small struct and will be freed soon in + * hvcs_free_partner_info(). */ + next_partner_info = kmalloc(sizeof(struct hvcs_partner_info), + GFP_ATOMIC); + + if (!next_partner_info) { + printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to" + " allocate partner info struct.\n"); + hvcs_free_partner_info(head); + return -ENOMEM; + } + + next_partner_info->unit_address + = (unsigned int)last_p_unit_address; + next_partner_info->partition_ID + = (unsigned int)last_p_partition_ID; + + /* copy the Null-term char too */ + strncpy(&next_partner_info->location_code[0], + (char *)&pi_buff[2], + strlen((char *)&pi_buff[2]) + 1); + + list_add_tail(&(next_partner_info->node), head); + next_partner_info = NULL; + + } while (more); + + return 0; +} +EXPORT_SYMBOL(hvcs_get_partner_info); + +/** + * hvcs_register_connection - establish a connection between this vty-server and + * a vty. + * @unit_address: The unit address of the vty-server adapter that is to be + * establish a connection. + * @p_partition_ID: The partition ID of the vty adapter that is to be connected. + * @p_unit_address: The unit address of the vty adapter to which the vty-server + * is to be connected. + * + * If this function is called once and -EINVAL is returned it may + * indicate that the partner info needs to be refreshed for the + * target unit address at which point the caller must invoke + * hvcs_get_partner_info() and then call this function again. If, + * for a second time, -EINVAL is returned then it indicates that + * there is probably already a partner connection registered to a + * different vty-server adapter. It is also possible that a second + * -EINVAL may indicate that one of the parms is not valid, for + * instance if the link was removed between the vty-server adapter + * and the vty adapter that you are trying to open. Don't shoot the + * messenger. Firmware implemented it this way. + */ +int hvcs_register_connection( uint32_t unit_address, + uint32_t p_partition_ID, uint32_t p_unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address, + p_partition_ID, p_unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_register_connection); + +/** + * hvcs_free_connection - free the connection between a vty-server and vty + * @unit_address: The unit address of the vty-server that is to have its + * connection severed. + * + * This function is used to free the partner connection between a vty-server + * adapter and a vty adapter. + * + * If -EBUSY is returned continue to call this function until 0 is returned. + */ +int hvcs_free_connection(uint32_t unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_FREE_VTERM, unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_free_connection); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fcc50bfd43fd..2043659ea7b1 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -5,7 +5,7 @@ * * Rewrite, cleanup: * - * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. * @@ -42,7 +42,6 @@ #include <asm/machdep.h> #include <asm/abs_addr.h> #include <asm/pSeries_reconfig.h> -#include <asm/systemcfg.h> #include <asm/firmware.h> #include <asm/tce.h> #include <asm/ppc-pci.h> @@ -110,6 +109,9 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, u64 rc; union tce_entry tce; + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + tce.te_word = 0; tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; tce.te_rdwr = 1; @@ -144,10 +146,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, union tce_entry tce, *tcep; long l, limit; - tcenum <<= TCE_PAGE_FACTOR; - npages <<= TCE_PAGE_FACTOR; - - if (npages == 1) + if (TCE_PAGE_FACTOR == 0 && npages == 1) return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction); @@ -165,6 +164,9 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, __get_cpu_var(tce_page) = tcep; } + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + tce.te_word = 0; tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; tce.te_rdwr = 1; @@ -582,7 +584,7 @@ void iommu_init_early_pSeries(void) return; } - if (systemcfg->platform & PLATFORM_LPAR) { + if (platform_is_lpar()) { if (firmware_has_feature(FW_FEATURE_MULTITCE)) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index a50e5f3f396d..cf1bc11b3346 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -298,18 +298,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, if (!(vflags & HPTE_V_BOLTED)) DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); -#if 1 - { - int i; - for (i=0;i<8;i++) { - unsigned long w0, w1; - plpar_pte_read(0, hpte_group, &w0, &w1); - BUG_ON (HPTE_V_COMPARE(hpte_v, w0) - && (w0 & HPTE_V_VALID)); - } - } -#endif - /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ /* Zero page = 0 */ diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index c198656a3bb5..999a9620b5ce 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -107,7 +107,6 @@ static void __init pSeries_request_regions(void) void __init pSeries_final_fixup(void) { - phbs_remap_io(); pSeries_request_regions(); pci_addr_cache_build(); @@ -123,7 +122,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev) int i; unsigned int reg; - if (!(systemcfg->platform & PLATFORM_PSERIES)) + if (!platform_is_pseries()) return; printk("Using INTC for W82c105 IDE controller.\n"); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index d7d400339458..d8864164dbe8 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -408,7 +408,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - if (!(systemcfg->platform & PLATFORM_PSERIES)) + if (!platform_is_pseries()) return 0; ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index e26b0420b6dd..a6f628d4c9dc 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -27,7 +27,6 @@ #include <asm/prom.h> #include <asm/nvram.h> #include <asm/atomic.h> -#include <asm/systemcfg.h> #if 0 #define DEBUG(A...) printk(KERN_ERR A) @@ -482,10 +481,12 @@ static int __init rtas_init(void) { struct proc_dir_entry *entry; - /* No RTAS, only warn if we are on a pSeries box */ + if (!platform_is_pseries()) + return 0; + + /* No RTAS */ if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) { - if (systemcfg->platform & PLATFORM_PSERIES) - printk(KERN_INFO "rtasd: no event-scan on system\n"); + printk(KERN_INFO "rtasd: no event-scan on system\n"); return 1; } diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c new file mode 100644 index 000000000000..2edc947f7c44 --- /dev/null +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -0,0 +1,235 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com> + * + * When ppc64 hardware fails the service processor dumps internal state + * of the system. After a reboot the operating system can access a dump + * of this data using this driver. A dump exists if the device-tree + * /chosen/ibm,scan-log-data property exists. + * + * This driver exports /proc/ppc64/scan-log-dump which can be read. + * The driver supports only sequential reads. + * + * The driver looks at a write to the driver for the single word "reset". + * If given, the driver will reset the scanlog so the platform can free it. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <asm/uaccess.h> +#include <asm/rtas.h> +#include <asm/prom.h> + +#define MODULE_VERS "1.0" +#define MODULE_NAME "scanlog" + +/* Status returns from ibm,scan-log-dump */ +#define SCANLOG_COMPLETE 0 +#define SCANLOG_HWERROR -1 +#define SCANLOG_CONTINUE 1 + +#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0) + +static int scanlog_debug; +static unsigned int ibm_scan_log_dump; /* RTAS token */ +static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ + +static ssize_t scanlog_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + struct proc_dir_entry *dp; + unsigned int *data; + int status; + unsigned long len, off; + unsigned int wait_time; + + dp = PDE(inode); + data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: read failed no data\n"); + return -EIO; + } + + if (count > RTAS_DATA_BUF_SIZE) + count = RTAS_DATA_BUF_SIZE; + + if (count < 1024) { + /* This is the min supported by this RTAS call. Rather + * than do all the buffering we insist the user code handle + * larger reads. As long as cp works... :) + */ + printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); + return -EINVAL; + } + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + for (;;) { + wait_time = 500; /* default wait if no data */ + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); + status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, + (u32) __pa(rtas_data_buf), (u32) count); + memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", + status, data[0], data[1], data[2]); + switch (status) { + case SCANLOG_COMPLETE: + DEBUG("hit eof\n"); + return 0; + case SCANLOG_HWERROR: + DEBUG("hardware error reading scan log data\n"); + return -EIO; + case SCANLOG_CONTINUE: + /* We may or may not have data yet */ + len = data[1]; + off = data[2]; + if (len > 0) { + if (copy_to_user(buf, ((char *)data)+off, len)) + return -EFAULT; + return len; + } + /* Break to sleep default time */ + break; + default: + if (status > 9900 && status <= 9905) { + wait_time = rtas_extended_busy_delay_time(status); + } else { + printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); + return -EIO; + } + } + /* Apparently no data yet. Wait and try again. */ + msleep_interruptible(wait_time); + } + /*NOTREACHED*/ +} + +static ssize_t scanlog_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + char stkbuf[20]; + int status; + + if (count > 19) count = 19; + if (copy_from_user (stkbuf, buf, count)) { + return -EFAULT; + } + stkbuf[count] = 0; + + if (buf) { + if (strncmp(stkbuf, "reset", 5) == 0) { + DEBUG("reset scanlog\n"); + status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); + DEBUG("rtas returns %d\n", status); + } else if (strncmp(stkbuf, "debugon", 7) == 0) { + printk(KERN_ERR "scanlog: debug on\n"); + scanlog_debug = 1; + } else if (strncmp(stkbuf, "debugoff", 8) == 0) { + printk(KERN_ERR "scanlog: debug off\n"); + scanlog_debug = 0; + } + } + return count; +} + +static int scanlog_open(struct inode * inode, struct file * file) +{ + struct proc_dir_entry *dp = PDE(inode); + unsigned int *data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: open failed no data\n"); + return -EIO; + } + + if (data[0] != 0) { + /* This imperfect test stops a second copy of the + * data (or a reset while data is being copied) + */ + return -EBUSY; + } + + data[0] = 0; /* re-init so we restart the scan */ + + return 0; +} + +static int scanlog_release(struct inode * inode, struct file * file) +{ + struct proc_dir_entry *dp = PDE(inode); + unsigned int *data = (unsigned int *)dp->data; + + if (!data) { + printk(KERN_ERR "scanlog: release failed no data\n"); + return -EIO; + } + data[0] = 0; + + return 0; +} + +struct file_operations scanlog_fops = { + .owner = THIS_MODULE, + .read = scanlog_read, + .write = scanlog_write, + .open = scanlog_open, + .release = scanlog_release, +}; + +int __init scanlog_init(void) +{ + struct proc_dir_entry *ent; + + ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); + if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "scan-log-dump not implemented on this system\n"); + return -EIO; + } + + ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL); + if (ent) { + ent->proc_fops = &scanlog_fops; + /* Ideally we could allocate a buffer < 4G */ + ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!ent->data) { + printk(KERN_ERR "Failed to allocate a buffer\n"); + remove_proc_entry("scan-log-dump", ent->parent); + return -ENOMEM; + } + ((unsigned int *)ent->data)[0] = 0; + } else { + printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n"); + return -EIO; + } + proc_ppc64_scan_log_dump = ent; + + return 0; +} + +void __exit scanlog_cleanup(void) +{ + if (proc_ppc64_scan_log_dump) { + kfree(proc_ppc64_scan_log_dump->data); + remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent); + } +} + +module_init(scanlog_init); +module_exit(scanlog_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a093a0d4dd69..4a465f067ede 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -200,14 +200,12 @@ static void __init pSeries_setup_arch(void) if (ppc64_interrupt_controller == IC_OPEN_PIC) { ppc_md.init_IRQ = pSeries_init_mpic; ppc_md.get_irq = mpic_get_irq; - ppc_md.cpu_irq_down = mpic_teardown_this_cpu; /* Allocate the mpic now, so that find_and_init_phbs() can * fill the ISUs */ pSeries_setup_mpic(); } else { ppc_md.init_IRQ = xics_init_IRQ; ppc_md.get_irq = xics_get_irq; - ppc_md.cpu_irq_down = xics_teardown_cpu; } #ifdef CONFIG_SMP @@ -249,7 +247,7 @@ static void __init pSeries_setup_arch(void) ppc_md.idle_loop = default_idle; } - if (systemcfg->platform & PLATFORM_LPAR) + if (platform_is_lpar()) ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; else ppc_md.enable_pmcs = power4_enable_pmcs; @@ -306,9 +304,7 @@ static void __init fw_feature_init(void) } of_node_put(dn); - no_rtas: - printk(KERN_INFO "firmware_features = 0x%lx\n", - ppc64_firmware_features); +no_rtas: DBG(" <- fw_feature_init()\n"); } @@ -378,7 +374,7 @@ static void __init pSeries_init_early(void) fw_feature_init(); - if (systemcfg->platform & PLATFORM_LPAR) + if (platform_is_lpar()) hpte_init_lpar(); else { hpte_init_native(); @@ -388,7 +384,7 @@ static void __init pSeries_init_early(void) generic_find_legacy_serial_ports(&physport, &default_speed); - if (systemcfg->platform & PLATFORM_LPAR) + if (platform_is_lpar()) find_udbg_vterm(); else if (physport) { /* Map the uart for udbg. */ @@ -508,7 +504,7 @@ static void pseries_dedicated_idle(void) lpaca->lppaca.idle = 1; if (!need_resched()) { - start_snooze = __get_tb() + + start_snooze = get_tb() + *smt_snooze_delay * tb_ticks_per_usec; while (!need_resched() && !cpu_is_offline(cpu)) { @@ -522,7 +518,7 @@ static void pseries_dedicated_idle(void) HMT_very_low(); if (*smt_snooze_delay != 0 && - __get_tb() > start_snooze) { + get_tb() > start_snooze) { HMT_medium(); dedicated_idle_sleep(cpu); } @@ -592,11 +588,32 @@ static void pseries_shared_idle(void) static int pSeries_pci_probe_mode(struct pci_bus *bus) { - if (systemcfg->platform & PLATFORM_LPAR) + if (platform_is_lpar()) return PCI_PROBE_DEVTREE; return PCI_PROBE_NORMAL; } +#ifdef CONFIG_KEXEC +static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) +{ + /* Don't risk a hypervisor call if we're crashing */ + if (!crash_shutdown) { + unsigned long vpa = __pa(&get_paca()->lppaca); + + if (unregister_vpa(hard_smp_processor_id(), vpa)) { + printk("VPA deregistration of cpu %u (hw_cpu_id %d) " + "failed\n", smp_processor_id(), + hard_smp_processor_id()); + } + } + + if (ppc64_interrupt_controller == IC_OPEN_PIC) + mpic_teardown_this_cpu(secondary); + else + xics_teardown_cpu(secondary); +} +#endif + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, @@ -619,4 +636,7 @@ struct machdep_calls __initdata pSeries_md = { .check_legacy_ioport = pSeries_check_legacy_ioport, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, +#ifdef CONFIG_KEXEC + .kexec_cpu_down = pseries_kexec_cpu_down, +#endif }; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 7a243e8ccd7e..25181c594d73 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -46,10 +46,12 @@ #include <asm/rtas.h> #include <asm/pSeries_reconfig.h> #include <asm/mpic.h> +#include <asm/vdso_datapage.h> #include "plpar_wrappers.h" #ifdef DEBUG +#include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) @@ -96,7 +98,7 @@ int pSeries_cpu_disable(void) int cpu = smp_processor_id(); cpu_clear(cpu, cpu_online_map); - systemcfg->processorCount--; + vdso_data->processorCount--; /*fix boot_cpuid here*/ if (cpu == boot_cpuid) @@ -441,7 +443,7 @@ void __init smp_init_pSeries(void) smp_ops->cpu_die = pSeries_cpu_die; /* Processors can be added/removed only on LPAR */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + if (platform_is_lpar()) pSeries_reconfig_notifier_register(&pSeries_smp_nb); #endif diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index c72c86f05cb6..0377decc0719 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -48,11 +48,6 @@ static struct hw_interrupt_type xics_pic = { .set_affinity = xics_set_affinity }; -static struct hw_interrupt_type xics_8259_pic = { - .typename = " XICS/8259", - .ack = xics_mask_and_ack_irq, -}; - /* This is used to map real irq numbers to virtual */ static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); @@ -367,12 +362,7 @@ int xics_get_irq(struct pt_regs *regs) /* for sanity, this had better be < NR_IRQS - 16 */ if (vec == xics_irq_8259_cascade_real) { irq = i8259_irq(regs); - if (irq == -1) { - /* Spurious cascaded interrupt. Still must ack xics */ - xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); - - irq = -1; - } + xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); } else if (vec == XICS_IRQ_SPURIOUS) { irq = -1; } else { @@ -542,10 +532,13 @@ nextnode: xics_irq_8259_cascade_real = *ireg; xics_irq_8259_cascade = virt_irq_create_mapping(xics_irq_8259_cascade_real); + i8259_init(0, 0); of_node_put(np); } - if (systemcfg->platform == PLATFORM_PSERIES) { + if (platform_is_lpar()) + ops = &pSeriesLP_ops; + else { #ifdef CONFIG_SMP for_each_cpu(i) { int hard_id; @@ -561,15 +554,9 @@ nextnode: #else xics_per_cpu[0] = ioremap(intr_base, intr_size); #endif /* CONFIG_SMP */ - } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) { - ops = &pSeriesLP_ops; } - xics_8259_pic.enable = i8259_pic.enable; - xics_8259_pic.disable = i8259_pic.disable; - for (i = 0; i < 16; ++i) - get_irq_desc(i)->handler = &xics_8259_pic; - for (; i < NR_IRQS; ++i) + for (i = irq_offset_value(); i < NR_IRQS; ++i) get_irq_desc(i)->handler = &xics_pic; xics_setup_cpu(); @@ -589,7 +576,6 @@ static int __init xics_setup_i8259(void) no_action, 0, "8259 cascade", NULL)) printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " "cascade\n"); - i8259_init(0, 0); } return 0; } diff --git a/arch/powerpc/sysdev/dart.h b/arch/powerpc/sysdev/dart.h index ea8f0d9eed8a..33ed9ed7fc1e 100644 --- a/arch/powerpc/sysdev/dart.h +++ b/arch/powerpc/sysdev/dart.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 105f05341a41..58d1cc2023c8 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -361,7 +361,8 @@ static void mpic_enable_irq(unsigned int irq) DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, - mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK); + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & + ~MPIC_VECPRI_MASK); /* make sure mask gets to controller before we return to user */ do { @@ -381,7 +382,8 @@ static void mpic_disable_irq(unsigned int irq) DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, - mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK); + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | + MPIC_VECPRI_MASK); /* make sure mask gets to controller before we return to user */ do { @@ -735,12 +737,13 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) spin_lock_irqsave(&mpic_lock, flags); if (is_ipi) { - reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK; + reg = mpic_ipi_read(irq - mpic->ipi_offset) & + ~MPIC_VECPRI_PRIORITY_MASK; mpic_ipi_write(irq - mpic->ipi_offset, reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } else { - reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI) - & MPIC_VECPRI_PRIORITY_MASK; + reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI) + & ~MPIC_VECPRI_PRIORITY_MASK; mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } diff --git a/arch/powerpc/sysdev/u3_iommu.c b/arch/powerpc/sysdev/u3_iommu.c index 543d65909812..5c1a26a6d00c 100644 --- a/arch/powerpc/sysdev/u3_iommu.c +++ b/arch/powerpc/sysdev/u3_iommu.c @@ -1,11 +1,11 @@ /* * arch/powerpc/sysdev/u3_iommu.c * - * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * Based on pSeries_iommu.c: * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation - * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation + * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * Dynamic DMA mapping support, Apple U3 & IBM CPC925 "DART" iommu. * @@ -226,7 +226,7 @@ static void iommu_table_u3_setup(void) iommu_table_u3.it_busno = 0; iommu_table_u3.it_offset = 0; /* it_size is in number of entries */ - iommu_table_u3.it_size = dart_tablesize / sizeof(u32); + iommu_table_u3.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR; /* Initialize the common IOMMU code */ iommu_table_u3.it_base = (unsigned long)dart_vbase; diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 79a784f0e7a9..b20312e5ed27 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_8xx) += start_8xx.o obj-$(CONFIG_6xx) += start_32.o obj-$(CONFIG_4xx) += start_32.o obj-$(CONFIG_PPC64) += start_64.o -obj-y += xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o +obj-y += xmon.o ppc-dis.o ppc-opc.o setjmp.o nonstdio.o diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c new file mode 100644 index 000000000000..78765833f4c0 --- /dev/null +++ b/arch/powerpc/xmon/nonstdio.c @@ -0,0 +1,134 @@ +/* + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <asm/time.h> +#include "nonstdio.h" + +int xmon_putchar(int c) +{ + char ch = c; + + if (c == '\n') + xmon_putchar('\r'); + return xmon_write(&ch, 1) == 1? c: -1; +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int xmon_expect(const char *str, unsigned long timeout) +{ + int c; + unsigned long t0; + + /* assume 25MHz default timebase if tb_ticks_per_sec not set yet */ + timeout *= tb_ticks_per_sec? tb_ticks_per_sec: 25000000; + t0 = get_tbl(); + do { + lineptr = line; + for (;;) { + c = xmon_read_poll(); + if (c == -1) { + if (get_tbl() - t0 > timeout) + return 0; + continue; + } + if (c == '\n') + break; + if (c != '\r' && lineptr < &line[sizeof(line) - 1]) + *lineptr++ = c; + } + *lineptr = 0; + } while (strstr(line, str) == NULL); + return 1; +} + +int xmon_getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = xmon_readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + xmon_putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + xmon_putchar('\b'); + xmon_putchar(' '); + xmon_putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + xmon_putchar('\a'); + else { + xmon_putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + +char *xmon_gets(char *str, int nb) +{ + char *p; + int c; + + for (p = str; p < str + nb - 1; ) { + c = xmon_getchar(); + if (c == -1) { + if (p == str) + return NULL; + break; + } + *p++ = c; + if (c == '\n') + break; + } + *p = 0; + return str; +} + +void xmon_printf(const char *format, ...) +{ + va_list args; + int n; + static char xmon_outbuf[1024]; + + va_start(args, format); + n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args); + va_end(args); + xmon_write(xmon_outbuf, n); +} diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h index 84211a21c6f4..47cebbd2b1b1 100644 --- a/arch/powerpc/xmon/nonstdio.h +++ b/arch/powerpc/xmon/nonstdio.h @@ -1,22 +1,14 @@ -typedef int FILE; -extern FILE *xmon_stdin, *xmon_stdout; #define EOF (-1) -#define stdin xmon_stdin -#define stdout xmon_stdout + #define printf xmon_printf -#define fprintf xmon_fprintf -#define fputs xmon_fputs -#define fgets xmon_fgets #define putchar xmon_putchar -#define getchar xmon_getchar -#define putc xmon_putc -#define getc xmon_getc -#define fopen(n, m) NULL -#define fflush(f) do {} while (0) -#define fclose(f) do {} while (0) -extern char *fgets(char *, int, void *); -extern void xmon_printf(const char *, ...); -extern void xmon_fprintf(void *, const char *, ...); -extern void xmon_sprintf(char *, const char *, ...); -#define perror(s) printf("%s: no files!\n", (s)) +extern int xmon_putchar(int c); +extern int xmon_getchar(void); +extern char *xmon_gets(char *, int); +extern void xmon_printf(const char *, ...); +extern void xmon_map_scc(void); +extern int xmon_expect(const char *str, unsigned long timeout); +extern int xmon_write(void *ptr, int nb); +extern int xmon_readchar(void); +extern int xmon_read_poll(void); diff --git a/arch/powerpc/xmon/setjmp.S b/arch/powerpc/xmon/setjmp.S index f8e40dfd2bff..96a91f10e2ec 100644 --- a/arch/powerpc/xmon/setjmp.S +++ b/arch/powerpc/xmon/setjmp.S @@ -14,61 +14,61 @@ _GLOBAL(xmon_setjmp) mflr r0 - STL r0,0(r3) - STL r1,SZL(r3) - STL r2,2*SZL(r3) + PPC_STL r0,0(r3) + PPC_STL r1,SZL(r3) + PPC_STL r2,2*SZL(r3) mfcr r0 - STL r0,3*SZL(r3) - STL r13,4*SZL(r3) - STL r14,5*SZL(r3) - STL r15,6*SZL(r3) - STL r16,7*SZL(r3) - STL r17,8*SZL(r3) - STL r18,9*SZL(r3) - STL r19,10*SZL(r3) - STL r20,11*SZL(r3) - STL r21,12*SZL(r3) - STL r22,13*SZL(r3) - STL r23,14*SZL(r3) - STL r24,15*SZL(r3) - STL r25,16*SZL(r3) - STL r26,17*SZL(r3) - STL r27,18*SZL(r3) - STL r28,19*SZL(r3) - STL r29,20*SZL(r3) - STL r30,21*SZL(r3) - STL r31,22*SZL(r3) + PPC_STL r0,3*SZL(r3) + PPC_STL r13,4*SZL(r3) + PPC_STL r14,5*SZL(r3) + PPC_STL r15,6*SZL(r3) + PPC_STL r16,7*SZL(r3) + PPC_STL r17,8*SZL(r3) + PPC_STL r18,9*SZL(r3) + PPC_STL r19,10*SZL(r3) + PPC_STL r20,11*SZL(r3) + PPC_STL r21,12*SZL(r3) + PPC_STL r22,13*SZL(r3) + PPC_STL r23,14*SZL(r3) + PPC_STL r24,15*SZL(r3) + PPC_STL r25,16*SZL(r3) + PPC_STL r26,17*SZL(r3) + PPC_STL r27,18*SZL(r3) + PPC_STL r28,19*SZL(r3) + PPC_STL r29,20*SZL(r3) + PPC_STL r30,21*SZL(r3) + PPC_STL r31,22*SZL(r3) li r3,0 blr _GLOBAL(xmon_longjmp) - CMPI r4,0 + PPC_LCMPI r4,0 bne 1f li r4,1 -1: LDL r13,4*SZL(r3) - LDL r14,5*SZL(r3) - LDL r15,6*SZL(r3) - LDL r16,7*SZL(r3) - LDL r17,8*SZL(r3) - LDL r18,9*SZL(r3) - LDL r19,10*SZL(r3) - LDL r20,11*SZL(r3) - LDL r21,12*SZL(r3) - LDL r22,13*SZL(r3) - LDL r23,14*SZL(r3) - LDL r24,15*SZL(r3) - LDL r25,16*SZL(r3) - LDL r26,17*SZL(r3) - LDL r27,18*SZL(r3) - LDL r28,19*SZL(r3) - LDL r29,20*SZL(r3) - LDL r30,21*SZL(r3) - LDL r31,22*SZL(r3) - LDL r0,3*SZL(r3) +1: PPC_LL r13,4*SZL(r3) + PPC_LL r14,5*SZL(r3) + PPC_LL r15,6*SZL(r3) + PPC_LL r16,7*SZL(r3) + PPC_LL r17,8*SZL(r3) + PPC_LL r18,9*SZL(r3) + PPC_LL r19,10*SZL(r3) + PPC_LL r20,11*SZL(r3) + PPC_LL r21,12*SZL(r3) + PPC_LL r22,13*SZL(r3) + PPC_LL r23,14*SZL(r3) + PPC_LL r24,15*SZL(r3) + PPC_LL r25,16*SZL(r3) + PPC_LL r26,17*SZL(r3) + PPC_LL r27,18*SZL(r3) + PPC_LL r28,19*SZL(r3) + PPC_LL r29,20*SZL(r3) + PPC_LL r30,21*SZL(r3) + PPC_LL r31,22*SZL(r3) + PPC_LL r0,3*SZL(r3) mtcrf 0x38,r0 - LDL r0,0(r3) - LDL r1,SZL(r3) - LDL r2,2*SZL(r3) + PPC_LL r0,0(r3) + PPC_LL r1,SZL(r3) + PPC_LL r2,2*SZL(r3) mtlr r0 mr r3,r4 blr @@ -84,52 +84,52 @@ _GLOBAL(xmon_longjmp) * different ABIs, though). */ _GLOBAL(xmon_save_regs) - STL r0,0*SZL(r3) - STL r2,2*SZL(r3) - STL r3,3*SZL(r3) - STL r4,4*SZL(r3) - STL r5,5*SZL(r3) - STL r6,6*SZL(r3) - STL r7,7*SZL(r3) - STL r8,8*SZL(r3) - STL r9,9*SZL(r3) - STL r10,10*SZL(r3) - STL r11,11*SZL(r3) - STL r12,12*SZL(r3) - STL r13,13*SZL(r3) - STL r14,14*SZL(r3) - STL r15,15*SZL(r3) - STL r16,16*SZL(r3) - STL r17,17*SZL(r3) - STL r18,18*SZL(r3) - STL r19,19*SZL(r3) - STL r20,20*SZL(r3) - STL r21,21*SZL(r3) - STL r22,22*SZL(r3) - STL r23,23*SZL(r3) - STL r24,24*SZL(r3) - STL r25,25*SZL(r3) - STL r26,26*SZL(r3) - STL r27,27*SZL(r3) - STL r28,28*SZL(r3) - STL r29,29*SZL(r3) - STL r30,30*SZL(r3) - STL r31,31*SZL(r3) + PPC_STL r0,0*SZL(r3) + PPC_STL r2,2*SZL(r3) + PPC_STL r3,3*SZL(r3) + PPC_STL r4,4*SZL(r3) + PPC_STL r5,5*SZL(r3) + PPC_STL r6,6*SZL(r3) + PPC_STL r7,7*SZL(r3) + PPC_STL r8,8*SZL(r3) + PPC_STL r9,9*SZL(r3) + PPC_STL r10,10*SZL(r3) + PPC_STL r11,11*SZL(r3) + PPC_STL r12,12*SZL(r3) + PPC_STL r13,13*SZL(r3) + PPC_STL r14,14*SZL(r3) + PPC_STL r15,15*SZL(r3) + PPC_STL r16,16*SZL(r3) + PPC_STL r17,17*SZL(r3) + PPC_STL r18,18*SZL(r3) + PPC_STL r19,19*SZL(r3) + PPC_STL r20,20*SZL(r3) + PPC_STL r21,21*SZL(r3) + PPC_STL r22,22*SZL(r3) + PPC_STL r23,23*SZL(r3) + PPC_STL r24,24*SZL(r3) + PPC_STL r25,25*SZL(r3) + PPC_STL r26,26*SZL(r3) + PPC_STL r27,27*SZL(r3) + PPC_STL r28,28*SZL(r3) + PPC_STL r29,29*SZL(r3) + PPC_STL r30,30*SZL(r3) + PPC_STL r31,31*SZL(r3) /* go up one stack frame for SP */ - LDL r4,0(r1) - STL r4,1*SZL(r3) + PPC_LL r4,0(r1) + PPC_STL r4,1*SZL(r3) /* get caller's LR */ - LDL r0,LRSAVE(r4) - STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) - STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + PPC_LL r0,LRSAVE(r4) + PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) mfmsr r0 - STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) mfctr r0 - STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) mfxer r0 - STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) mfcr r0 - STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) li r0,0 - STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) blr diff --git a/arch/powerpc/xmon/start_32.c b/arch/powerpc/xmon/start_32.c index 69b658c0f760..c2464df4217e 100644 --- a/arch/powerpc/xmon/start_32.c +++ b/arch/powerpc/xmon/start_32.c @@ -11,7 +11,6 @@ #include <linux/cuda.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/sysrq.h> #include <linux/bitops.h> #include <asm/xmon.h> #include <asm/prom.h> @@ -22,10 +21,11 @@ #include <asm/processor.h> #include <asm/delay.h> #include <asm/btext.h> +#include <asm/time.h> +#include "nonstdio.h" static volatile unsigned char __iomem *sccc, *sccd; unsigned int TXRDY, RXRDY, DLAB; -static int xmon_expect(const char *str, unsigned int timeout); static int use_serial; static int use_screen; @@ -33,16 +33,6 @@ static int via_modem; static int xmon_use_sccb; static struct device_node *channel_node; -#define TB_SPEED 25000000 - -static inline unsigned int readtb(void) -{ - unsigned int ret; - - asm volatile("mftb %0" : "=r" (ret) :); - return ret; -} - void buf_access(void) { if (DLAB) @@ -91,23 +81,7 @@ static unsigned long chrp_find_phys_io_base(void) } #endif /* CONFIG_PPC_CHRP */ -#ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_xmon(int key, struct pt_regs *regs, - struct tty_struct *tty) -{ - xmon(regs); -} - -static struct sysrq_key_op sysrq_xmon_op = -{ - .handler = sysrq_handle_xmon, - .help_msg = "Xmon", - .action_msg = "Entering xmon", -}; -#endif - -void -xmon_map_scc(void) +void xmon_map_scc(void) { #ifdef CONFIG_PPC_MULTIPLATFORM volatile unsigned char __iomem *base; @@ -217,8 +191,6 @@ xmon_map_scc(void) RXRDY = 1; DLAB = 0x80; #endif /* platform */ - - register_sysrq_key('x', &sysrq_xmon_op); } static int scc_initialized = 0; @@ -238,8 +210,7 @@ static inline void do_poll_adb(void) #endif /* CONFIG_ADB_CUDA */ } -int -xmon_write(void *handle, void *ptr, int nb) +int xmon_write(void *ptr, int nb) { char *p = ptr; int i, c, ct; @@ -311,8 +282,7 @@ static unsigned char xmon_shift_keytab[128] = "\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */ "\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */ -static int -xmon_get_adb_key(void) +static int xmon_get_adb_key(void) { int k, t, on; @@ -350,32 +320,21 @@ xmon_get_adb_key(void) } #endif /* CONFIG_BOOTX_TEXT */ -int -xmon_read(void *handle, void *ptr, int nb) +int xmon_readchar(void) { - char *p = ptr; - int i; - #ifdef CONFIG_BOOTX_TEXT - if (use_screen) { - for (i = 0; i < nb; ++i) - *p++ = xmon_get_adb_key(); - return i; - } + if (use_screen) + return xmon_get_adb_key(); #endif - if (!scc_initialized) - xmon_init_scc(); - for (i = 0; i < nb; ++i) { + if (!scc_initialized) + xmon_init_scc(); while ((*sccc & RXRDY) == 0) - do_poll_adb(); + do_poll_adb(); buf_access(); - *p++ = *sccd; - } - return i; + return *sccd; } -int -xmon_read_poll(void) +int xmon_read_poll(void) { if ((*sccc & RXRDY) == 0) { do_poll_adb(); @@ -395,8 +354,7 @@ static unsigned char scc_inittab[] = { 3, 0xc1, /* rx enable, 8 bits */ }; -void -xmon_init_scc(void) +void xmon_init_scc(void) { if ( _machine == _MACH_chrp ) { @@ -410,6 +368,7 @@ xmon_init_scc(void) else if ( _machine == _MACH_Pmac ) { int i, x; + unsigned long timeout; if (channel_node != 0) pmac_call_feature( @@ -424,8 +383,12 @@ xmon_init_scc(void) PMAC_FTR_MODEM_ENABLE, channel_node, 0, 1); printk(KERN_INFO "Modem powered up by debugger !\n"); - t0 = readtb(); - while (readtb() - t0 < 3*TB_SPEED) + t0 = get_tbl(); + timeout = 3 * tb_ticks_per_sec; + if (timeout == 0) + /* assume 25MHz if tb_ticks_per_sec not set */ + timeout = 75000000; + while (get_tbl() - t0 < timeout) eieio(); } /* use the B channel if requested */ @@ -447,164 +410,19 @@ xmon_init_scc(void) scc_initialized = 1; if (via_modem) { for (;;) { - xmon_write(NULL, "ATE1V1\r", 7); + xmon_write("ATE1V1\r", 7); if (xmon_expect("OK", 5)) { - xmon_write(NULL, "ATA\r", 4); + xmon_write("ATA\r", 4); if (xmon_expect("CONNECT", 40)) break; } - xmon_write(NULL, "+++", 3); + xmon_write("+++", 3); xmon_expect("OK", 3); } } } -void *xmon_stdin; -void *xmon_stdout; -void *xmon_stderr; - -int xmon_putc(int c, void *f) -{ - char ch = c; - - if (c == '\n') - xmon_putc('\r', f); - return xmon_write(f, &ch, 1) == 1? c: -1; -} - -int xmon_putchar(int c) -{ - return xmon_putc(c, xmon_stdout); -} - -int xmon_fputs(char *str, void *f) -{ - int n = strlen(str); - - return xmon_write(f, str, n) == n? 0: -1; -} - -int -xmon_readchar(void) -{ - char ch; - - for (;;) { - switch (xmon_read(xmon_stdin, &ch, 1)) { - case 1: - return ch; - case -1: - xmon_printf("read(stdin) returned -1\r\n", 0, 0); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int xmon_expect(const char *str, unsigned int timeout) -{ - int c; - unsigned int t0; - - timeout *= TB_SPEED; - t0 = readtb(); - do { - lineptr = line; - for (;;) { - c = xmon_read_poll(); - if (c == -1) { - if (readtb() - t0 > timeout) - return 0; - continue; - } - if (c == '\n') - break; - if (c != '\r' && lineptr < &line[sizeof(line) - 1]) - *lineptr++ = c; - } - *lineptr = 0; - } while (strstr(line, str) == NULL); - return 1; -} - -int -xmon_getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = xmon_readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - xmon_putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - xmon_putchar('\a'); - else { - xmon_putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - -char * -xmon_fgets(char *str, int nb, void *f) -{ - char *p; - int c; - - for (p = str; p < str + nb - 1; ) { - c = xmon_getchar(); - if (c == -1) { - if (p == str) - return NULL; - break; - } - *p++ = c; - if (c == '\n') - break; - } - *p = 0; - return str; -} - -void -xmon_enter(void) +void xmon_enter(void) { #ifdef CONFIG_ADB_PMU if (_machine == _MACH_Pmac) { @@ -613,8 +431,7 @@ xmon_enter(void) #endif } -void -xmon_leave(void) +void xmon_leave(void) { #ifdef CONFIG_ADB_PMU if (_machine == _MACH_Pmac) { diff --git a/arch/powerpc/xmon/start_64.c b/arch/powerpc/xmon/start_64.c index e50c158191e1..712552c4f242 100644 --- a/arch/powerpc/xmon/start_64.c +++ b/arch/powerpc/xmon/start_64.c @@ -6,182 +6,29 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/sysrq.h> -#include <linux/init.h> #include <asm/machdep.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/prom.h> -#include <asm/processor.h> #include <asm/udbg.h> -#include <asm/system.h> #include "nonstdio.h" -#ifdef CONFIG_MAGIC_SYSRQ - -static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, - struct tty_struct *tty) -{ - /* ensure xmon is enabled */ - xmon_init(1); - debugger(pt_regs); -} - -static struct sysrq_key_op sysrq_xmon_op = +void xmon_map_scc(void) { - .handler = sysrq_handle_xmon, - .help_msg = "Xmon", - .action_msg = "Entering xmon", -}; - -static int __init setup_xmon_sysrq(void) -{ - register_sysrq_key('x', &sysrq_xmon_op); - return 0; } -__initcall(setup_xmon_sysrq); -#endif /* CONFIG_MAGIC_SYSRQ */ -int -xmon_write(void *handle, void *ptr, int nb) +int xmon_write(void *ptr, int nb) { return udbg_write(ptr, nb); } -int -xmon_read(void *handle, void *ptr, int nb) +int xmon_readchar(void) { - return udbg_read(ptr, nb); + if (udbg_getc) + return udbg_getc(); + return -1; } -int -xmon_read_poll(void) +int xmon_read_poll(void) { if (udbg_getc_poll) return udbg_getc_poll(); return -1; } - -FILE *xmon_stdin; -FILE *xmon_stdout; - -int -xmon_putc(int c, void *f) -{ - char ch = c; - - if (c == '\n') - xmon_putc('\r', f); - return xmon_write(f, &ch, 1) == 1? c: -1; -} - -int -xmon_putchar(int c) -{ - return xmon_putc(c, xmon_stdout); -} - -int -xmon_fputs(char *str, void *f) -{ - int n = strlen(str); - - return xmon_write(f, str, n) == n? 0: -1; -} - -int -xmon_readchar(void) -{ - char ch; - - for (;;) { - switch (xmon_read(xmon_stdin, &ch, 1)) { - case 1: - return ch; - case -1: - xmon_printf("read(stdin) returned -1\r\n", 0, 0); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int -xmon_getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = xmon_readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - xmon_putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - xmon_putchar('\a'); - else { - xmon_putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - -char * -xmon_fgets(char *str, int nb, void *f) -{ - char *p; - int c; - - for (p = str; p < str + nb - 1; ) { - c = xmon_getchar(); - if (c == -1) { - if (p == str) - return NULL; - break; - } - *p++ = c; - if (c == '\n') - break; - } - *p = 0; - return str; -} diff --git a/arch/powerpc/xmon/start_8xx.c b/arch/powerpc/xmon/start_8xx.c index a48bd594cf61..4c17b0486ad5 100644 --- a/arch/powerpc/xmon/start_8xx.c +++ b/arch/powerpc/xmon/start_8xx.c @@ -15,273 +15,30 @@ #include <asm/8xx_immap.h> #include <asm/mpc8xx.h> #include <asm/commproc.h> +#include "nonstdio.h" -extern void xmon_printf(const char *fmt, ...); extern int xmon_8xx_write(char *str, int nb); extern int xmon_8xx_read_poll(void); extern int xmon_8xx_read_char(void); -void prom_drawhex(uint); -void prom_drawstring(const char *str); -static int use_screen = 1; /* default */ - -#define TB_SPEED 25000000 - -static inline unsigned int readtb(void) -{ - unsigned int ret; - - asm volatile("mftb %0" : "=r" (ret) :); - return ret; -} - -void buf_access(void) -{ -} - -void -xmon_map_scc(void) +void xmon_map_scc(void) { - cpmp = (cpm8xx_t *)&(((immap_t *)IMAP_ADDR)->im_cpm); - use_screen = 0; - - prom_drawstring("xmon uses serial port\n"); } -static int scc_initialized = 0; - void xmon_init_scc(void); -int -xmon_write(void *handle, void *ptr, int nb) +int xmon_write(void *ptr, int nb) { - char *p = ptr; - int i, c, ct; - - if (!scc_initialized) - xmon_init_scc(); - return(xmon_8xx_write(ptr, nb)); } -int xmon_wants_key; - -int -xmon_read(void *handle, void *ptr, int nb) +int xmon_readchar(void) { - char *p = ptr; - int i; - - if (!scc_initialized) - xmon_init_scc(); - - for (i = 0; i < nb; ++i) { - *p++ = xmon_8xx_read_char(); - } - return i; + return xmon_8xx_read_char(); } -int -xmon_read_poll(void) +int xmon_read_poll(void) { return(xmon_8xx_read_poll()); } - -void -xmon_init_scc() -{ - scc_initialized = 1; -} - -#if 0 -extern int (*prom_entry)(void *); - -int -xmon_exit(void) -{ - struct prom_args { - char *service; - } args; - - for (;;) { - args.service = "exit"; - (*prom_entry)(&args); - } -} -#endif - -void *xmon_stdin; -void *xmon_stdout; -void *xmon_stderr; - -void -xmon_init(void) -{ -} - -int -xmon_putc(int c, void *f) -{ - char ch = c; - - if (c == '\n') - xmon_putc('\r', f); - return xmon_write(f, &ch, 1) == 1? c: -1; -} - -int -xmon_putchar(int c) -{ - return xmon_putc(c, xmon_stdout); -} - -int -xmon_fputs(char *str, void *f) -{ - int n = strlen(str); - - return xmon_write(f, str, n) == n? 0: -1; -} - -int -xmon_readchar(void) -{ - char ch; - - for (;;) { - switch (xmon_read(xmon_stdin, &ch, 1)) { - case 1: - return ch; - case -1: - xmon_printf("read(stdin) returned -1\r\n", 0, 0); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -#if 0 -int xmon_expect(const char *str, unsigned int timeout) -{ - int c; - unsigned int t0; - - timeout *= TB_SPEED; - t0 = readtb(); - do { - lineptr = line; - for (;;) { - c = xmon_read_poll(); - if (c == -1) { - if (readtb() - t0 > timeout) - return 0; - continue; - } - if (c == '\n') - break; - if (c != '\r' && lineptr < &line[sizeof(line) - 1]) - *lineptr++ = c; - } - *lineptr = 0; - } while (strstr(line, str) == NULL); - return 1; -} -#endif - -int -xmon_getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = xmon_readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - xmon_putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - xmon_putchar('\b'); - xmon_putchar(' '); - xmon_putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - xmon_putchar('\a'); - else { - xmon_putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - -char * -xmon_fgets(char *str, int nb, void *f) -{ - char *p; - int c; - - for (p = str; p < str + nb - 1; ) { - c = xmon_getchar(); - if (c == -1) { - if (p == str) - return 0; - break; - } - *p++ = c; - if (c == '\n') - break; - } - *p = 0; - return str; -} - -void -prom_drawhex(uint val) -{ - unsigned char buf[10]; - - int i; - for (i = 7; i >= 0; i--) - { - buf[i] = "0123456789abcdef"[val & 0x0f]; - val >>= 4; - } - buf[8] = '\0'; - xmon_fputs(buf, xmon_stdout); -} - -void -prom_drawstring(const char *str) -{ - xmon_fputs(str, xmon_stdout); -} diff --git a/arch/powerpc/xmon/subr_prf.c b/arch/powerpc/xmon/subr_prf.c deleted file mode 100644 index b48738c6dd33..000000000000 --- a/arch/powerpc/xmon/subr_prf.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Written by Cort Dougan to replace the version originally used - * by Paul Mackerras, which came from NetBSD and thus had copyright - * conflicts with Linux. - * - * This file makes liberal use of the standard linux utility - * routines to reduce the size of the binary. We assume we can - * trust some parts of Linux inside the debugger. - * -- Cort (cort@cs.nmt.edu) - * - * Copyright (C) 1999 Cort Dougan. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/module.h> -#include <stdarg.h> -#include "nonstdio.h" - -extern int xmon_write(void *, void *, int); - -void xmon_vfprintf(void *f, const char *fmt, va_list ap) -{ - static char xmon_buf[2048]; - int n; - - n = vsprintf(xmon_buf, fmt, ap); - xmon_write(f, xmon_buf, n); -} - -void xmon_printf(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - xmon_vfprintf(stdout, fmt, ap); - va_end(ap); -} -EXPORT_SYMBOL(xmon_printf); - -void xmon_fprintf(void *f, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - xmon_vfprintf(f, fmt, ap); - va_end(ap); -} - diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 1124f1146202..c45a6ad5f3b7 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1,7 +1,7 @@ /* * Routines providing a simple monitor for use on the PowerMac. * - * Copyright (C) 1996 Paul Mackerras. + * Copyright (C) 1996-2005 Paul Mackerras. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,6 +18,8 @@ #include <linux/kallsyms.h> #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/sysrq.h> +#include <linux/interrupt.h> #include <asm/ptrace.h> #include <asm/string.h> @@ -144,15 +146,10 @@ static void xmon_print_symbol(unsigned long address, const char *mid, static const char *getvecname(unsigned long vec); extern int print_insn_powerpc(unsigned long, unsigned long, int); -extern void printf(const char *fmt, ...); -extern void xmon_vfprintf(void *f, const char *fmt, va_list ap); -extern int xmon_putc(int c, void *f); -extern int putchar(int ch); extern void xmon_enter(void); extern void xmon_leave(void); -extern int xmon_read_poll(void); extern long setjmp(long *); extern void longjmp(long *, long); extern void xmon_save_regs(struct pt_regs *); @@ -748,7 +745,6 @@ cmds(struct pt_regs *excp) printf("%x:", smp_processor_id()); #endif /* CONFIG_SMP */ printf("mon> "); - fflush(stdout); flush_input(); termch = 0; cmd = skipbl(); @@ -1472,17 +1468,23 @@ read_spr(int n) { unsigned int instrs[2]; unsigned long (*code)(void); - unsigned long opd[3]; unsigned long ret = -1UL; +#ifdef CONFIG_PPC64 + unsigned long opd[3]; - instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); - instrs[1] = 0x4e800020; opd[0] = (unsigned long)instrs; opd[1] = 0; opd[2] = 0; + code = (unsigned long (*)(void)) opd; +#else + code = (unsigned long (*)(void)) instrs; +#endif + + /* mfspr r3,n; blr */ + instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; store_inst(instrs); store_inst(instrs+1); - code = (unsigned long (*)(void)) opd; if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; @@ -1504,16 +1506,21 @@ write_spr(int n, unsigned long val) { unsigned int instrs[2]; unsigned long (*code)(unsigned long); +#ifdef CONFIG_PPC64 unsigned long opd[3]; - instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); - instrs[1] = 0x4e800020; opd[0] = (unsigned long)instrs; opd[1] = 0; opd[2] = 0; + code = (unsigned long (*)(unsigned long)) opd; +#else + code = (unsigned long (*)(unsigned long)) instrs; +#endif + + instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6); + instrs[1] = 0x4e800020; store_inst(instrs); store_inst(instrs+1); - code = (unsigned long (*)(unsigned long)) opd; if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; @@ -1797,7 +1804,7 @@ memex(void) for(;;){ if (!mnoread) n = mread(adrs, val, size); - printf("%.16x%c", adrs, brev? 'r': ' '); + printf(REG"%c", adrs, brev? 'r': ' '); if (!mnoread) { if (brev) byterev(val, size); @@ -1976,17 +1983,18 @@ prdump(unsigned long adrs, long ndump) nr = mread(adrs, temp, r); adrs += nr; for (m = 0; m < r; ++m) { - if ((m & 7) == 0 && m > 0) - putchar(' '); + if ((m & (sizeof(long) - 1)) == 0 && m > 0) + putchar(' '); if (m < nr) printf("%.2x", temp[m]); else printf("%s", fault_chars[fault_type]); } - if (m <= 8) - printf(" "); - for (; m < 16; ++m) + for (; m < 16; ++m) { + if ((m & (sizeof(long) - 1)) == 0) + putchar(' '); printf(" "); + } printf(" |"); for (m = 0; m < r; ++m) { if (m < nr) { @@ -2151,7 +2159,6 @@ memzcan(void) ok = mread(a, &v, 1); if (ok && !ook) { printf("%.8x .. ", a); - fflush(stdout); } else if (!ok && ook) printf("%.8x\n", a - mskip); ook = ok; @@ -2372,7 +2379,7 @@ int inchar(void) { if (lineptr == NULL || *lineptr == 0) { - if (fgets(line, sizeof(line), stdin) == NULL) { + if (xmon_gets(line, sizeof(line)) == NULL) { lineptr = NULL; return EOF; } @@ -2526,4 +2533,29 @@ void xmon_init(int enable) __debugger_dabr_match = NULL; __debugger_fault_handler = NULL; } + xmon_map_scc(); +} + +#ifdef CONFIG_MAGIC_SYSRQ +static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + /* ensure xmon is enabled */ + xmon_init(1); + debugger(pt_regs); +} + +static struct sysrq_key_op sysrq_xmon_op = +{ + .handler = sysrq_handle_xmon, + .help_msg = "Xmon", + .action_msg = "Entering xmon", +}; + +static int __init setup_xmon_sysrq(void) +{ + register_sysrq_key('x', &sysrq_xmon_op); + return 0; } +__initcall(setup_xmon_sysrq); +#endif /* CONFIG_MAGIC_SYSRQ */ |