diff options
Diffstat (limited to 'arch/arm26')
119 files changed, 25018 insertions, 0 deletions
diff --git a/arch/arm26/ACKNOWLEDGEMENTS b/arch/arm26/ACKNOWLEDGEMENTS new file mode 100644 index 000000000000..0a17a45110e7 --- /dev/null +++ b/arch/arm26/ACKNOWLEDGEMENTS @@ -0,0 +1,29 @@ +The work in this architecture (ARM26) is that of a great many people. + +This is what has happened: + +I [Ian Molton] have been trying to repair the ARM26 architecture support, but it has become an impossible task whilst it is still merged with the ARM32 (arch/arm) code. The ARM26 code is too different to be sensible to keep with the ARM32 code now, and Russell King really doesnt have the time to maintain the ARM26 code. Add to that that most ARM32 developers dont know about or care about ARM26 when writing patches, and you have a reall mess. + +As a result, I've split it off into a new architecture of its own. I've named it arm26 since these CPUs have only a 26 bit address space, unlike the other ARMs. + +The upheaval in moving around so many source files and chopping out vasty ammounts of cruft was enormous, and the copyright of many files is sometimes unclear. Because of this, I am writing this, in order that no-one is left out / misaccredited / blamed for any of the code. + +People I KNOW have made major contributions to the code: + +David Alan Gilbert (former maintainer of ARM26 bits) +Philip Blundell +Russell King +Keith Owens + +also thanks to Nicholas Pitre for hints, and for the basis or our XIP support. + +Currently maintaing the code are + +Ian Molton (Maintainer / Archimedes) +John Appleby (kernel / A5K) + +If anyone has a problem with attributions in header files / source files, please do contact me to straighten things out. + +Ian Molton (aka spyro) - ARM26 maintainer +spyro@f2s.com + diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig new file mode 100644 index 000000000000..3955de5af4c0 --- /dev/null +++ b/arch/arm26/Kconfig @@ -0,0 +1,227 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux Kernel Configuration" + +config ARM + bool + default y + +config ARM26 + bool + default y + +config MMU + bool + default y + +config ARCH_ACORN + bool + default y + +config CPU_26 + bool + default y + +config FIQ + bool + default y + +# 9 = 512 pages 8 = 256 pages 7 = 128 pages +config FORCE_MAX_ZONEORDER + int + default 9 + +config UID16 + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config GENERIC_BUST_SPINLOCK + bool + +config GENERIC_ISA_DMA + bool + +source "init/Kconfig" + + +menu "System Type" + +comment "Archimedes/A5000 Implementations (select only ONE)" + +config ARCH_ARC + bool "Archimedes" + help + Say Y to support the Acorn Archimedes. + + The Acorn Archimedes was an personal computer based on an 8MHz ARM2 + processor, released in 1987. It supported up to 16MB of RAM in + later models and floppy, harddisc, ethernet etc. + +config ARCH_A5K + bool "A5000" + help + Say Y here to to support the Acorn A5000. + + Linux can support the + internal IDE disk and CD-ROM interface, serial and parallel port, + and the floppy drive. Note that on some A5000s the floppy is + plugged into the wrong socket on the motherboard. + +config PAGESIZE_16 + bool "2MB physical memory (broken)" + help + Say Y here if your Archimedes or A5000 system has only 2MB of + memory, otherwise say N. The resulting kernel will not run on a + machine with 4MB of memory. +endmenu + +menu "General setup" + +# Compressed boot loader in ROM. Yes, we really want to ask about +# TEXT and BSS so we preserve their values in the config files. +config ZBOOT_ROM + bool "Compressed boot loader in ROM/flash" + help + Say Y here if you intend to execute your compressed kernel image (zImage) + directly from ROM or flash. If unsure, say N. + +config ZBOOT_ROM_TEXT + depends on ZBOOT_ROM + hex "Compressed ROM boot loader base address" + default "0" + help + The base address for zImage. Unless you have special requirements, you + should not change this value. + +config ZBOOT_ROM_BSS + depends on ZBOOT_ROM + hex "Compressed ROM boot loader BSS address" + default "0" + help + The base address of 64KiB of read/write memory, which must be available + while the decompressor is running. Unless you have special requirements, + you should not change this value. + +config XIP_KERNEL + bool "Execute In Place (XIP) kernel image" + help + Select this option to create a kernel that can be programed into + the OS ROMs. + +comment "At least one math emulation must be selected" + +config FPE_NWFPE + tristate "NWFPE math emulation" + ---help--- + Say Y to include the NWFPE floating point emulator in the kernel. + This is necessary to run most binaries. Linux does not currently + support floating point hardware so you need to say Y here even if + your machine has an FPA or floating point co-processor podule. + + It is also possible to say M to build the emulator as a module + (nwfpe) or indeed to leave it out altogether. However, unless you + know what you are doing this can easily render your machine + unbootable. Saying Y is the safe option. + + You may say N here if you are going to load the Acorn FPEmulator + early in the bootup. + +source "fs/Kconfig.binfmt" + +config PREEMPT + bool "Preemptible Kernel (EXPERIMENTAL)" + depends on CPU_32 && EXPERIMENTAL + help + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + +config ARTHUR + tristate "RISC OS personality" + depends on CPU_32 + help + Say Y here to include the kernel code necessary if you want to run + Acorn RISC OS/Arthur binaries under Linux. This code is still very + experimental; if this sounds frightening, say N and sleep in peace. + You can also say M here to compile this support as a module (which + will be called arthur). + +config CMDLINE + string "Default kernel command string" + default "" + help + On some architectures (EBSA110 and CATS), there is currently no way + for the boot loader to pass arguments to the kernel. For these + architectures, you should supply some command-line options at build + time by entering them here. As a minimum, you should specify the + memory size and the root device (e.g., mem=64M root=/dev/nfs). + +endmenu + +source "drivers/base/Kconfig" + +source "drivers/parport/Kconfig" + +source "drivers/pnp/Kconfig" + +source "drivers/block/Kconfig" + +source "drivers/md/Kconfig" + +source "net/Kconfig" + +source "drivers/ide/Kconfig" + +source "drivers/scsi/Kconfig" + +source "drivers/isdn/Kconfig" + +# +# input before char - char/joystick depends on it. As does USB. +# +source "drivers/input/Kconfig" + +source "drivers/char/Kconfig" + +source "drivers/media/Kconfig" + +source "fs/Kconfig" + +source "drivers/video/Kconfig" + +if ARCH_ACORN + +source "sound/Kconfig" + +endif + +source "drivers/misc/Kconfig" + +source "drivers/usb/Kconfig" + +source "arch/arm26/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/arm26/Kconfig.debug b/arch/arm26/Kconfig.debug new file mode 100644 index 000000000000..611fc86503fc --- /dev/null +++ b/arch/arm26/Kconfig.debug @@ -0,0 +1,50 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +# RMK wants arm kernels compiled with frame pointers so hardwire this to y. +# If you know what you are doing and are willing to live without stack +# traces, you can get a slightly smaller kernel by setting this option to +# n, but then RMK will have to kill you ;). +config FRAME_POINTER + bool + default y + help + If you say N here, the resulting kernel will be slightly smaller and + faster. However, when a problem occurs with the kernel, the + information that is reported is severely limited. Most people + should say Y here. + +config DEBUG_USER + bool "Verbose user fault messages" + help + When a user program crashes due to an exception, the kernel can + print a brief message explaining what the problem was. This is + sometimes helpful for debugging but serves no purpose on a + production system. Most people should say N here. + +config DEBUG_WAITQ + bool "Wait queue debugging" + depends on DEBUG_KERNEL + +config DEBUG_ERRORS + bool "Verbose kernel error messages" + depends on DEBUG_KERNEL + help + This option controls verbose debugging information which can be + printed when the kernel detects an internal error. This debugging + information is useful to kernel hackers when tracking down problems, + but mostly meaningless to other people. It's safe to say Y unless + you are concerned with the code size or don't want to see these + messages. + +# These options are only for real kernel hackers who want to get their hands dirty. +config DEBUG_LL + bool "Kernel low-level debugging functions" + depends on DEBUG_KERNEL + help + Say Y here to include definitions of printascii, printchar, printhex + in the kernel. This is helpful if you are debugging code that + executes before the console is initialized. + +endmenu diff --git a/arch/arm26/Makefile b/arch/arm26/Makefile new file mode 100644 index 000000000000..ada8985530a5 --- /dev/null +++ b/arch/arm26/Makefile @@ -0,0 +1,118 @@ +# +# arch/arm26/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995-2001 by Russell King +# Copyright (c) 2004 Ian Molton + +LDFLAGS_vmlinux :=-p -X +CPPFLAGS_vmlinux.lds = -DTEXTADDR=$(TEXTADDR) -DDATAADDR=$(DATAADDR) +OBJCOPYFLAGS :=-O binary -R .note -R .comment -S +GZFLAGS :=-9 + +ifeq ($(CONFIG_FRAME_POINTER),y) +CFLAGS +=-fno-omit-frame-pointer -mno-sched-prolog +endif + +ifeq ($(CONFIG_DEBUG_INFO),y) +CFLAGS +=-g +endif + +CFLAGS_BOOT :=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm +CFLAGS +=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm +AFLAGS +=-mapcs-26 -mcpu=arm3 -msoft-float + +ifeq ($(CONFIG_XIP_KERNEL),y) + TEXTADDR := 0x03880000 + DATAADDR := 0x02080000 +else + TEXTADDR := 0x02080000 + DATAADDR := . +endif + +head-y := arch/arm26/kernel/head.o arch/arm26/kernel/init_task.o + +ifeq ($(incdir-y),) +incdir-y := +endif +INCDIR := + +export MACHINE TEXTADDR GZFLAGS CFLAGS_BOOT + +# If we have a machine-specific directory, then include it in the build. +core-y += arch/arm26/kernel/ arch/arm26/mm/ arch/arm26/machine/ +core-$(CONFIG_FPE_NWFPE) += arch/arm26/nwfpe/ + +libs-y += arch/arm26/lib/ + +# Default target when executing plain make +all: zImage + +boot := arch/arm26/boot + +prepare: include/asm-$(ARCH)/asm_offsets.h +CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h + + +.PHONY: maketools FORCE +maketools: FORCE + + +# Convert bzImage to zImage +bzImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage + +zImage Image bootpImage xipImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +zinstall install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +# We use MRPROPER_FILES and CLEAN_FILES now +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +# My testing targets (that short circuit a few dependencies) +zImg:; $(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage +Img:; $(Q)$(MAKE) $(build)=$(boot) $(boot)/Image +bp:; $(Q)$(MAKE) $(build)=$(boot) $(boot)/bootpImage +i:; $(Q)$(MAKE) $(build)=$(boot) install +zi:; $(Q)$(MAKE) $(build)=$(boot) zinstall + +# +# Configuration targets. Use these to select a +# configuration for your architecture +%_config: + @( \ + CFG=$(@:_config=); \ + if [ -f arch/arm26/def-configs/$$CFG ]; then \ + [ -f .config ] && mv -f .config .config.old; \ + cp arch/arm26/def-configs/$$CFG .config; \ + echo "*** Default configuration for $$CFG installed"; \ + echo "*** Next, you may run 'make oldconfig'"; \ + else \ + echo "$$CFG does not exist"; \ + fi; \ + ) + +arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \ + include/config/MARKER + +include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s + $(call filechk,gen-asm-offsets) + +define archhelp + echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' + echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' + echo ' bootpImage - Combined zImage and initial RAM disk' + echo ' xipImage - eXecute In Place capable image for ROM use (arch/$(ARCH)/boot/xipImage)' + echo ' initrd - Create an initial image' + echo ' install - Install uncompressed kernel' + echo ' zinstall - Install compressed kernel' + echo ' Install using (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' +endef diff --git a/arch/arm26/boot/Makefile b/arch/arm26/boot/Makefile new file mode 100644 index 000000000000..b5c2277654d4 --- /dev/null +++ b/arch/arm26/boot/Makefile @@ -0,0 +1,80 @@ +# +# arch/arm26/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995-2002 Russell King +# + +# Note: the following conditions must always be true: +# ZRELADDR == virt_to_phys(TEXTADDR) +# PARAMS_PHYS must be with 4MB of ZRELADDR +# INITRD_PHYS must be in RAM + + zreladdr-y := 0x02080000 +params_phys-y := 0x0207c000 +initrd_phys-y := 0x02180000 + +ZRELADDR := 0x02080000 +ZTEXTADDR := 0x0207c000 +PARAMS_PHYS := $(params_phys-y) +INITRD_PHYS := 0x02180000 + +# We now have a PIC decompressor implementation. Decompressors running +# from RAM should not define ZTEXTADDR. Decompressors running directly +# from ROM or Flash must define ZTEXTADDR (preferably via the config) +# FIXME: Previous assignment to ztextaddr-y is lost here. See SHARK +ifeq ($(CONFIG_ZBOOT_ROM),y) +ZTEXTADDR := $(CONFIG_ZBOOT_ROM_TEXT) +ZBSSADDR := $(CONFIG_ZBOOT_ROM_BSS) +else +ZTEXTADDR := 0 +ZBSSADDR := ALIGN(4) +endif + +export ZTEXTADDR ZBSSADDR ZRELADDR INITRD_PHYS PARAMS_PHYS + +targets := Image zImage bootpImage xipImage + +$(obj)/Image: vmlinux FORCE + $(call if_changed,objcopy) + @echo ' Kernel: $@ is ready' + +$(obj)/zImage: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + @echo ' Kernel: $@ is ready' + +$(obj)/compressed/vmlinux: vmlinux FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed $@ + +ifeq ($(CONFIG_XIP_KERNEL),y) +$(obj)/xipImage: vmlinux FORCE +# $(OBJCOPY) -S -O binary -R .data -R .comment vmlinux vmlinux-text.bin +# FIXME - where has .pci_fixup crept in from? + $(OBJCOPY) -S -O binary -R .data -R .pci_fixup -R .comment vmlinux vmlinux-text.bin + $(OBJCOPY) -S -O binary -R .init -R .text -R __ex_table -R .pci_fixup -R __ksymtab -R __ksymtab_gpl -R __kcrctab -R __kcrctab_gpl -R __param -R .comment vmlinux vmlinux-data.bin + cat vmlinux-text.bin vmlinux-data.bin > $@ + $(RM) -f vmlinux-text.bin vmlinux-data.bin + @echo ' Kernel: $@ is ready' +endif + +.PHONY: initrd +initrd: + @test "$(INITRD_PHYS)" != "" || \ + (echo This machine does not support INITRD; exit -1) + @test "$(INITRD)" != "" || \ + (echo You must specify INITRD; exit -1) + +install: $(obj)/Image + $(CONFIG_SHELL) $(obj)/install.sh \ + $(KERNELRELEASE) \ + $(obj)/Image System.map "$(INSTALL_PATH)" + +zinstall: $(obj)/zImage + $(CONFIG_SHELL) $(obj)/install.sh \ + $(KERNELRELEASE) \ + $(obj)/zImage System.map "$(INSTALL_PATH)" + +subdir- := compressed diff --git a/arch/arm26/boot/compressed/Makefile b/arch/arm26/boot/compressed/Makefile new file mode 100644 index 000000000000..b1d9ddebbe74 --- /dev/null +++ b/arch/arm26/boot/compressed/Makefile @@ -0,0 +1,50 @@ +# +# linux/arch/arm26/boot/compressed/Makefile +# +# create a compressed vmlinuz image from the original vmlinux +# +# Note! ZTEXTADDR, ZBSSADDR and ZRELADDR are now exported +# from arch/arm26/boot/Makefile +# + +HEAD = head.o +OBJS = misc.o +FONTC = drivers/video/console/font_acorn_8x8.c + +OBJS += ll_char_wr.o font.o +CFLAGS_misc.o := -DPARAMS_PHYS=$(PARAMS_PHYS) + +targets := vmlinux vmlinux.lds piggy piggy.gz piggy.o font.o head.o $(OBJS) + +SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/LOAD_ADDR/$(ZRELADDR)/;s/BSS_START/$(ZBSSADDR)/ + +EXTRA_CFLAGS := $(CFLAGS_BOOT) -fpic +EXTRA_AFLAGS := -traditional + +LDFLAGS_vmlinux := -p -X \ + $(shell $(CC) $(CFLAGS)) -T + +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \ + $(addprefix $(obj)/, $(OBJS)) FORCE + $(call if_changed,ld) + @: + + +$(obj)/piggy: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/piggy.gz: $(obj)/piggy FORCE + $(call if_changed,gzip) + +LDFLAGS_piggy.o := -r -b binary +$(obj)/piggy.o: $(obj)/piggy.gz FORCE + $(call if_changed,ld) + +$(obj)/font.o: $(FONTC) + $(CC) $(CFLAGS) -Dstatic= -c $(FONTC) -o $(obj)/font.o + +$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in Makefile arch/arm26/boot/Makefile .config + @sed "$(SEDFLAGS)" < $< > $@ + +$(obj)/misc.o: $(obj)/misc.c $(obj)/uncompress.h lib/inflate.c + diff --git a/arch/arm26/boot/compressed/head.S b/arch/arm26/boot/compressed/head.S new file mode 100644 index 000000000000..0307804a6070 --- /dev/null +++ b/arch/arm26/boot/compressed/head.S @@ -0,0 +1,517 @@ +/* + * linux/arch/arm26/boot/compressed/head.S + * + * Copyright (C) 1996-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/linkage.h> + +/* + * Debugging stuff + * + * Note that these macros must not contain any code which is not + * 100% relocatable. Any attempt to do so will result in a crash. + * Please select one of the following when turning on debugging. + */ + + .macro kputc,val + mov r0, \val + bl putc + .endm + + .macro kphex,val,len + mov r0, \val + mov r1, #\len + bl phex + .endm + + .macro debug_reloc_start + .endm + + .macro debug_reloc_end + .endm + + .section ".start", #alloc, #execinstr +/* + * sort out different calling conventions + */ + .align +start: + .type start,#function + .rept 8 + mov r0, r0 + .endr + + b 1f + .word 0x016f2818 @ Magic numbers to help the loader + .word start @ absolute load/run zImage address + .word _edata @ zImage end address +1: mov r7, r1 @ save architecture ID + mov r8, #0 @ save r0 + teqp pc, #0x0c000003 @ turn off interrupts + + .text + adr r0, LC0 + ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp} + subs r0, r0, r1 @ calculate the delta offset + + teq r0, #0 @ if delta is zero, we're + beq not_relocated @ running at the address we + @ were linked at. + + add r2, r2, r0 @ different address, so we + add r3, r3, r0 @ need to fix up various + add r5, r5, r0 @ pointers. + add r6, r6, r0 + add ip, ip, r0 + add sp, sp, r0 + +1: ldr r1, [r6, #0] @ relocate entries in the GOT + add r1, r1, r0 @ table. This fixes up the + str r1, [r6], #4 @ C references. + cmp r6, ip + blo 1b + +not_relocated: mov r0, #0 +1: str r0, [r2], #4 @ clear bss + str r0, [r2], #4 + str r0, [r2], #4 + str r0, [r2], #4 + cmp r2, r3 + blo 1b + + bl cache_on + + mov r1, sp @ malloc space above stack + add r2, sp, #0x10000 @ 64k max + +/* + * Check to see if we will overwrite ourselves. + * r4 = final kernel address + * r5 = start of this image + * r2 = end of malloc space (and therefore this image) + * We basically want: + * r4 >= r2 -> OK + * r4 + image length <= r5 -> OK + */ + cmp r4, r2 + bhs wont_overwrite + add r0, r4, #4096*1024 @ 4MB largest kernel size + cmp r0, r5 + bls wont_overwrite + + mov r5, r2 @ decompress after malloc space + mov r0, r5 + mov r3, r7 + bl decompress_kernel + + add r0, r0, #127 + bic r0, r0, #127 @ align the kernel length +/* + * r0 = decompressed kernel length + * r1-r3 = unused + * r4 = kernel execution address + * r5 = decompressed kernel start + * r6 = processor ID + * r7 = architecture ID + * r8-r14 = unused + */ + add r1, r5, r0 @ end of decompressed kernel + adr r2, reloc_start + ldr r3, LC1 + add r3, r2, r3 +1: ldmia r2!, {r8 - r13} @ copy relocation code + stmia r1!, {r8 - r13} + ldmia r2!, {r8 - r13} + stmia r1!, {r8 - r13} + cmp r2, r3 + blo 1b + + bl cache_clean_flush + add pc, r5, r0 @ call relocation code + +/* + * We're not in danger of overwriting ourselves. Do this the simple way. + * + * r4 = kernel execution address + * r7 = architecture ID + */ +wont_overwrite: mov r0, r4 + mov r3, r7 + bl decompress_kernel + b call_kernel + + .type LC0, #object +LC0: .word LC0 @ r1 + .word __bss_start @ r2 + .word _end @ r3 + .word _load_addr @ r4 + .word _start @ r5 + .word _got_start @ r6 + .word _got_end @ ip + .word user_stack+4096 @ sp +LC1: .word reloc_end - reloc_start + .size LC0, . - LC0 + +/* + * Turn on the cache. We need to setup some page tables so that we + * can have both the I and D caches on. + * + * We place the page tables 16k down from the kernel execution address, + * and we hope that nothing else is using it. If we're using it, we + * will go pop! + * + * On entry, + * r4 = kernel execution address + * r6 = processor ID + * r7 = architecture number + * r8 = run-time address of "start" + * On exit, + * r1, r2, r3, r8, r9, r12 corrupted + * This routine must preserve: + * r4, r5, r6, r7 + */ + .align 5 +cache_on: mov r3, #8 @ cache_on function + b call_cache_fn + +__setup_mmu: sub r3, r4, #16384 @ Page directory size + bic r3, r3, #0xff @ Align the pointer + bic r3, r3, #0x3f00 +/* + * Initialise the page tables, turning on the cacheable and bufferable + * bits for the RAM area only. + */ + mov r0, r3 + mov r8, r0, lsr #18 + mov r8, r8, lsl #18 @ start of RAM + add r9, r8, #0x10000000 @ a reasonable RAM size + mov r1, #0x12 + orr r1, r1, #3 << 10 + add r2, r3, #16384 +1: cmp r1, r8 @ if virt > start of RAM + orrhs r1, r1, #0x0c @ set cacheable, bufferable + cmp r1, r9 @ if virt > end of RAM + bichs r1, r1, #0x0c @ clear cacheable, bufferable + str r1, [r0], #4 @ 1:1 mapping + add r1, r1, #1048576 + teq r0, r2 + bne 1b +/* + * If ever we are running from Flash, then we surely want the cache + * to be enabled also for our execution instance... We map 2MB of it + * so there is no map overlap problem for up to 1 MB compressed kernel. + * If the execution is in RAM then we would only be duplicating the above. + */ + mov r1, #0x1e + orr r1, r1, #3 << 10 + mov r2, pc, lsr #20 + orr r1, r1, r2, lsl #20 + add r0, r3, r2, lsl #2 + str r1, [r0], #4 + add r1, r1, #1048576 + str r1, [r0] + mov pc, lr + +__armv4_cache_on: + mov r12, lr + bl __setup_mmu + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs + mrc p15, 0, r0, c1, c0, 0 @ read control reg + orr r0, r0, #0x1000 @ I-cache enable + orr r0, r0, #0x0030 + b __common_cache_on + +__arm6_cache_on: + mov r12, lr + bl __setup_mmu + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 + mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 + mov r0, #0x30 +__common_cache_on: +#ifndef DEBUG + orr r0, r0, #0x000d @ Write buffer, mmu +#endif + mov r1, #-1 + mcr p15, 0, r3, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c3, c0, 0 @ load domain access control + mcr p15, 0, r0, c1, c0, 0 @ load control register + mov pc, r12 + +/* + * All code following this line is relocatable. It is relocated by + * the above code to the end of the decompressed kernel image and + * executed there. During this time, we have no stacks. + * + * r0 = decompressed kernel length + * r1-r3 = unused + * r4 = kernel execution address + * r5 = decompressed kernel start + * r6 = processor ID + * r7 = architecture ID + * r8-r14 = unused + */ + .align 5 +reloc_start: add r8, r5, r0 + debug_reloc_start + mov r1, r4 +1: + .rept 4 + ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel + stmia r1!, {r0, r2, r3, r9 - r13} + .endr + + cmp r5, r8 + blo 1b + debug_reloc_end + +call_kernel: bl cache_clean_flush + bl cache_off + mov r0, #0 + mov r1, r7 @ restore architecture number + mov pc, r4 @ call kernel + +/* + * Here follow the relocatable cache support functions for the + * various processors. This is a generic hook for locating an + * entry and jumping to an instruction at the specified offset + * from the start of the block. Please note this is all position + * independent code. + * + * r1 = corrupted + * r2 = corrupted + * r3 = block offset + * r6 = corrupted + * r12 = corrupted + */ + +call_cache_fn: adr r12, proc_types + mrc p15, 0, r6, c0, c0 @ get processor ID +1: ldr r1, [r12, #0] @ get value + ldr r2, [r12, #4] @ get mask + eor r1, r1, r6 @ (real ^ match) + tst r1, r2 @ & mask + addeq pc, r12, r3 @ call cache function + add r12, r12, #4*5 + b 1b + +/* + * Table for cache operations. This is basically: + * - CPU ID match + * - CPU ID mask + * - 'cache on' method instruction + * - 'cache off' method instruction + * - 'cache flush' method instruction + * + * We match an entry using: ((real_id ^ match) & mask) == 0 + * + * Writethrough caches generally only need 'on' and 'off' + * methods. Writeback caches _must_ have the flush method + * defined. + */ + .type proc_types,#object +proc_types: + .word 0x41560600 @ ARM6/610 + .word 0xffffffe0 + b __arm6_cache_off @ works, but slow + b __arm6_cache_off + mov pc, lr +@ b __arm6_cache_on @ untested +@ b __arm6_cache_off +@ b __armv3_cache_flush + + .word 0x41007000 @ ARM7/710 + .word 0xfff8fe00 + b __arm7_cache_off + b __arm7_cache_off + mov pc, lr + + .word 0x41807200 @ ARM720T (writethrough) + .word 0xffffff00 + b __armv4_cache_on + b __armv4_cache_off + mov pc, lr + + .word 0x41129200 @ ARM920T + .word 0xff00fff0 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush + + .word 0x4401a100 @ sa110 / sa1100 + .word 0xffffffe0 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush + + .word 0x6901b110 @ sa1110 + .word 0xfffffff0 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush + + .word 0x69050000 @ xscale + .word 0xffff0000 + b __armv4_cache_on + b __armv4_cache_off + b __armv4_cache_flush + + .word 0 @ unrecognised type + .word 0 + mov pc, lr + mov pc, lr + mov pc, lr + + .size proc_types, . - proc_types + +/* + * Turn off the Cache and MMU. ARMv3 does not support + * reading the control register, but ARMv4 does. + * + * On entry, r6 = processor ID + * On exit, r0, r1, r2, r3, r12 corrupted + * This routine must preserve: r4, r6, r7 + */ + .align 5 +cache_off: mov r3, #12 @ cache_off function + b call_cache_fn + +__armv4_cache_off: + mrc p15, 0, r0, c1, c0 + bic r0, r0, #0x000d + mcr p15, 0, r0, c1, c0 @ turn MMU and cache off + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 + mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 + mov pc, lr + +__arm6_cache_off: + mov r0, #0x00000030 @ ARM6 control reg. + b __armv3_cache_off + +__arm7_cache_off: + mov r0, #0x00000070 @ ARM7 control reg. + b __armv3_cache_off + +__armv3_cache_off: + mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 + mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 + mov pc, lr + +/* + * Clean and flush the cache to maintain consistency. + * + * On entry, + * r6 = processor ID + * On exit, + * r1, r2, r3, r12 corrupted + * This routine must preserve: + * r0, r4, r5, r6, r7 + */ + .align 5 +cache_clean_flush: + mov r3, #16 + b call_cache_fn + +__armv4_cache_flush: + bic r1, pc, #31 + add r2, r1, #65536 @ 2x the largest dcache size +1: ldr r12, [r1], #32 @ s/w flush D cache + teq r1, r2 + bne 1b + + mcr p15, 0, r1, c7, c7, 0 @ flush I cache + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mov pc, lr + +__armv3_cache_flush: + mov r1, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 + mov pc, lr + +/* + * Various debugging routines for printing hex characters and + * memory, which again must be relocatable. + */ +#ifdef DEBUG + .type phexbuf,#object +phexbuf: .space 12 + .size phexbuf, . - phexbuf + +phex: adr r3, phexbuf + mov r2, #0 + strb r2, [r3, r1] +1: subs r1, r1, #1 + movmi r0, r3 + bmi puts + and r2, r0, #15 + mov r0, r0, lsr #4 + cmp r2, #10 + addge r2, r2, #7 + add r2, r2, #'0' + strb r2, [r3, r1] + b 1b + +puts: loadsp r3 +1: ldrb r2, [r0], #1 + teq r2, #0 + moveq pc, lr +2: writeb r2 + mov r1, #0x00020000 +3: subs r1, r1, #1 + bne 3b + teq r2, #'\n' + moveq r2, #'\r' + beq 2b + teq r0, #0 + bne 1b + mov pc, lr +putc: + mov r2, r0 + mov r0, #0 + loadsp r3 + b 2b + +memdump: mov r12, r0 + mov r10, lr + mov r11, #0 +2: mov r0, r11, lsl #2 + add r0, r0, r12 + mov r1, #8 + bl phex + mov r0, #':' + bl putc +1: mov r0, #' ' + bl putc + ldr r0, [r12, r11, lsl #2] + mov r1, #8 + bl phex + and r0, r11, #7 + teq r0, #3 + moveq r0, #' ' + bleq putc + and r0, r11, #7 + add r11, r11, #1 + teq r0, #7 + bne 1b + mov r0, #'\n' + bl putc + cmp r11, #64 + blt 2b + mov pc, r10 +#endif + +reloc_end: + + .align + .section ".stack", "aw" +user_stack: .space 4096 diff --git a/arch/arm26/boot/compressed/hw-bse.c b/arch/arm26/boot/compressed/hw-bse.c new file mode 100644 index 000000000000..3e8f07f8e08a --- /dev/null +++ b/arch/arm26/boot/compressed/hw-bse.c @@ -0,0 +1,74 @@ +/* + * Bright Star Engineering Inc. + * + * code for readng parameters from the + * parameter blocks of the boot block + * flash memory + * + */ + +static int strcmp(const char *s1, const char *s2) +{ + while (*s1 != '\0' && *s1 == *s2) + { + s1++; + s2++; + } + + return (*(unsigned char *) s1) - (*(unsigned char *) s2); +} + +struct pblk_t { + char type; + unsigned short size; +}; + +static char *bse_getflashparam(char *name) { + unsigned int esize; + char *q,*r; + unsigned char *p,*e; + struct pblk_t *thepb = (struct pblk_t *) 0x00004000; + struct pblk_t *altpb = (struct pblk_t *) 0x00006000; + if (thepb->type&1) { + if (altpb->type&1) { + /* no valid param block */ + return (char*)0; + } else { + /* altpb is valid */ + struct pblk_t *tmp; + tmp = thepb; + thepb = altpb; + altpb = tmp; + } + } + p = (char*)thepb + sizeof(struct pblk_t); + e = p + thepb->size; + while (p < e) { + q = p; + esize = *p; + if (esize == 0xFF) break; + if (esize == 0) break; + if (esize > 127) { + esize = (esize&0x7F)<<8 | p[1]; + q++; + } + q++; + r=q; + if (*r && ((name == 0) || (!strcmp(name,r)))) { + while (*q++) ; + return q; + } + p+=esize; + } + return (char*)0; +} + +void bse_setup(void) { + /* extract the linux cmdline from flash */ + char *name=bse_getflashparam("linuxboot"); + char *x = (char *)0xc0000100; + if (name) { + while (*name) *x++=*name++; + } + *x=0; +} diff --git a/arch/arm26/boot/compressed/ll_char_wr.S b/arch/arm26/boot/compressed/ll_char_wr.S new file mode 100644 index 000000000000..f024c3ebdfa5 --- /dev/null +++ b/arch/arm26/boot/compressed/ll_char_wr.S @@ -0,0 +1,162 @@ +/* + * linux/arch/arm26/lib/ll_char_wr.S + * + * Copyright (C) 1995, 1996 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Speedups & 1bpp code (C) 1996 Philip Blundell & Russell King. + * + * 10-04-96 RMK Various cleanups & reduced register usage. + * 08-04-98 RMK Shifts re-ordered + */ + +@ Regs: [] = corruptible +@ {} = used +@ () = do not use + +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +#define BOLD 0x01 +#define ITALIC 0x02 +#define UNDERLINE 0x04 +#define FLASH 0x08 +#define INVERSE 0x10 + +LC0: .word bytes_per_char_h + .word video_size_row + .word acorndata_8x8 + .word con_charconvtable + +ENTRY(ll_write_char) + stmfd sp!, {r4 - r7, lr} +@ +@ Smashable regs: {r0 - r3}, [r4 - r7], (r8 - fp), [ip], (sp), [lr], (pc) +@ + eor ip, r1, #UNDERLINE << 9 +/* + * calculate colours + */ + tst r1, #INVERSE << 9 + moveq r2, r1, lsr #16 + moveq r3, r1, lsr #24 + movne r2, r1, lsr #24 + movne r3, r1, lsr #16 + and r3, r3, #255 + and r2, r2, #255 +/* + * calculate offset into character table + */ + mov r1, r1, lsl #23 + mov r1, r1, lsr #20 +/* + * calculate offset required for each row [maybe I should make this an argument to this fn. + * Have to see what the register usage is like in the calling routines. + */ + adr r4, LC0 + ldmia r4, {r4, r5, r6, lr} + ldr r4, [r4] + ldr r5, [r5] +/* + * Go to resolution-dependent routine... + */ + cmp r4, #4 + blt Lrow1bpp + eor r2, r3, r2 @ Create eor mask to change colour from bg + orr r3, r3, r3, lsl #8 @ to fg. + orr r3, r3, r3, lsl #16 + add r0, r0, r5, lsl #3 @ Move to bottom of character + add r1, r1, #7 + ldrb r7, [r6, r1] + tst ip, #UNDERLINE << 9 + eoreq r7, r7, #255 + teq r4, #8 + beq Lrow8bpplp +@ +@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc) +@ + orr r3, r3, r3, lsl #4 +Lrow4bpplp: ldr r7, [lr, r7, lsl #2] + mul r7, r2, r7 + tst r1, #7 @ avoid using r7 directly after + eor ip, r3, r7 + str ip, [r0, -r5]! + LOADREGS(eqfd, sp!, {r4 - r7, pc}) + sub r1, r1, #1 + ldrb r7, [r6, r1] + ldr r7, [lr, r7, lsl #2] + mul r7, r2, r7 + tst r1, #7 @ avoid using r7 directly after + eor ip, r3, r7 + str ip, [r0, -r5]! + subne r1, r1, #1 + ldrneb r7, [r6, r1] + bne Lrow4bpplp + LOADREGS(fd, sp!, {r4 - r7, pc}) + +@ +@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc) +@ +Lrow8bpplp: mov ip, r7, lsr #4 + ldr ip, [lr, ip, lsl #2] + mul r4, r2, ip + and ip, r7, #15 @ avoid r4 + ldr ip, [lr, ip, lsl #2] @ avoid r4 + mul ip, r2, ip @ avoid r4 + eor r4, r3, r4 @ avoid ip + tst r1, #7 @ avoid ip + sub r0, r0, r5 @ avoid ip + eor ip, r3, ip + stmia r0, {r4, ip} + LOADREGS(eqfd, sp!, {r4 - r7, pc}) + sub r1, r1, #1 + ldrb r7, [r6, r1] + mov ip, r7, lsr #4 + ldr ip, [lr, ip, lsl #2] + mul r4, r2, ip + and ip, r7, #15 @ avoid r4 + ldr ip, [lr, ip, lsl #2] @ avoid r4 + mul ip, r2, ip @ avoid r4 + eor r4, r3, r4 @ avoid ip + tst r1, #7 @ avoid ip + sub r0, r0, r5 @ avoid ip + eor ip, r3, ip + stmia r0, {r4, ip} + subne r1, r1, #1 + ldrneb r7, [r6, r1] + bne Lrow8bpplp + LOADREGS(fd, sp!, {r4 - r7, pc}) + +@ +@ Smashable regs: {r0 - r3}, [r4], {r5, r6}, [r7], (r8 - fp), [ip], (sp), [lr], (pc) +@ +Lrow1bpp: add r6, r6, r1 + ldmia r6, {r4, r7} + tst ip, #INVERSE << 9 + mvnne r4, r4 + mvnne r7, r7 + strb r4, [r0], r5 + mov r4, r4, lsr #8 + strb r4, [r0], r5 + mov r4, r4, lsr #8 + strb r4, [r0], r5 + mov r4, r4, lsr #8 + strb r4, [r0], r5 + strb r7, [r0], r5 + mov r7, r7, lsr #8 + strb r7, [r0], r5 + mov r7, r7, lsr #8 + strb r7, [r0], r5 + mov r7, r7, lsr #8 + tst ip, #UNDERLINE << 9 + mvneq r7, r7 + strb r7, [r0], r5 + LOADREGS(fd, sp!, {r4 - r7, pc}) + + .bss +ENTRY(con_charconvtable) + .space 1024 diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c new file mode 100644 index 000000000000..f17f50e5516f --- /dev/null +++ b/arch/arm26/boot/compressed/misc.c @@ -0,0 +1,316 @@ +/* + * misc.c + * + * This is a collection of several routines from gzip-1.0.3 + * adapted for Linux. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * + * Modified for ARM Linux by Russell King + * + * Nicolas Pitre <nico@visuaide.com> 1999/04/14 : + * For this code to run directly from Flash, all constant variables must + * be marked with 'const' and all other variables initialized at run-time + * only. This way all non constant variables will end up in the bss segment, + * which should point to addresses in RAM and cleared to 0 on start. + * This allows for a much quicker boot time. + */ + +unsigned int __machine_arch_type; + +#include <linux/kernel.h> + +#include <asm/uaccess.h> +#include "uncompress.h" + +#ifdef STANDALONE_DEBUG +#define puts printf +#endif + +#define __ptr_t void * + +/* + * Optimised C version of memzero for the ARM. + */ +void __memzero (__ptr_t s, size_t n) +{ + union { void *vp; unsigned long *ulp; unsigned char *ucp; } u; + int i; + + u.vp = s; + + for (i = n >> 5; i > 0; i--) { + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + } + + if (n & 1 << 4) { + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + *u.ulp++ = 0; + } + + if (n & 1 << 3) { + *u.ulp++ = 0; + *u.ulp++ = 0; + } + + if (n & 1 << 2) + *u.ulp++ = 0; + + if (n & 1 << 1) { + *u.ucp++ = 0; + *u.ucp++ = 0; + } + + if (n & 1) + *u.ucp++ = 0; +} + +static inline __ptr_t memcpy(__ptr_t __dest, __const __ptr_t __src, + size_t __n) +{ + int i = 0; + unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src; + + for (i = __n >> 3; i > 0; i--) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + + if (__n & 1 << 2) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + + if (__n & 1 << 1) { + *d++ = *s++; + *d++ = *s++; + } + + if (__n & 1) + *d++ = *s++; + + return __dest; +} + +/* + * gzip delarations + */ +#define OF(args) args +#define STATIC static + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define WSIZE 0x8000 /* Window size must be at least 32k, */ + /* and a power of two */ + +static uch *inbuf; /* input buffer */ +static uch window[WSIZE]; /* Sliding window buffer */ + +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +static int fill_inbuf(void); +static void flush_window(void); +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +extern char input_data[]; +extern char input_data_end[]; + +static uch *output_data; +static ulg output_ptr; +static ulg bytes_out; + +static void *malloc(int size); +static void free(void *where); +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +static void puts(const char *); + +extern int end; +static ulg free_mem_ptr; +static ulg free_mem_ptr_end; + +#define HEAP_SIZE 0x2000 + +#include "../../../../lib/inflate.c" + +#ifndef STANDALONE_DEBUG +static void *malloc(int size) +{ + void *p; + + if (size <0) error("Malloc error"); + if (free_mem_ptr <= 0) error("Memory error"); + + free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ + + p = (void *)free_mem_ptr; + free_mem_ptr += size; + + if (free_mem_ptr >= free_mem_ptr_end) + error("Out of memory"); + return p; +} + +static void free(void *where) +{ /* gzip_mark & gzip_release do the free */ +} + +static void gzip_mark(void **ptr) +{ + arch_decomp_wdog(); + *ptr = (void *) free_mem_ptr; +} + +static void gzip_release(void **ptr) +{ + arch_decomp_wdog(); + free_mem_ptr = (long) *ptr; +} +#else +static void gzip_mark(void **ptr) +{ +} + +static void gzip_release(void **ptr) +{ +} +#endif + +/* =========================================================================== + * Fill the input buffer. This is called only when the buffer is empty + * and at least one byte is really needed. + */ +int fill_inbuf(void) +{ + if (insize != 0) + error("ran out of input data"); + + inbuf = input_data; + insize = &input_data_end[0] - &input_data[0]; + + inptr = 1; + return inbuf[0]; +} + +/* =========================================================================== + * Write the output window window[0..outcnt-1] and update crc and bytes_out. + * (Used for the decompressed data only.) + */ +void flush_window(void) +{ + ulg c = crc; + unsigned n; + uch *in, *out, ch; + + in = window; + out = &output_data[output_ptr]; + for (n = 0; n < outcnt; n++) { + ch = *out++ = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + output_ptr += (ulg)outcnt; + outcnt = 0; + puts("."); +} + +static void error(char *x) +{ + int ptr; + + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + while(1); /* Halt */ +} + +#ifndef STANDALONE_DEBUG + +ulg +decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p, + int arch_id) +{ + output_data = (uch *)output_start; /* Points to kernel start */ + free_mem_ptr = free_mem_ptr_p; + free_mem_ptr_end = free_mem_ptr_end_p; + __machine_arch_type = arch_id; + + arch_decomp_setup(); + + makecrc(); + puts("Uncompressing Linux..."); + gunzip(); + puts(" done, booting the kernel.\n"); + return output_ptr; +} +#else + +char output_buffer[1500*1024]; + +int main() +{ + output_data = output_buffer; + + makecrc(); + puts("Uncompressing Linux..."); + gunzip(); + puts("done.\n"); + return 0; +} +#endif + diff --git a/arch/arm26/boot/compressed/uncompress.h b/arch/arm26/boot/compressed/uncompress.h new file mode 100644 index 000000000000..66d9b938a7a4 --- /dev/null +++ b/arch/arm26/boot/compressed/uncompress.h @@ -0,0 +1,110 @@ +/* + * + * Copyright (C) 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define VIDMEM ((char *)0x02000000) + +int video_num_columns, video_num_lines, video_size_row; +int white, bytes_per_char_h; +extern unsigned long con_charconvtable[256]; + +struct param_struct { + unsigned long page_size; + unsigned long nr_pages; + unsigned long ramdisk_size; + unsigned long mountrootrdonly; + unsigned long rootdev; + unsigned long video_num_cols; + unsigned long video_num_rows; + unsigned long video_x; + unsigned long video_y; + unsigned long memc_control_reg; + unsigned char sounddefault; + unsigned char adfsdrives; + unsigned char bytes_per_char_h; + unsigned char bytes_per_char_v; + unsigned long unused[256/4-11]; +}; + +static struct param_struct *params = (struct param_struct *)0x0207c000; + +/* + * This does not append a newline + */ +static void puts(const char *s) +{ + extern void ll_write_char(char *, unsigned long); + int x,y; + unsigned char c; + char *ptr; + + x = params->video_x; + y = params->video_y; + + while ( ( c = *(unsigned char *)s++ ) != '\0' ) { + if ( c == '\n' ) { + x = 0; + if ( ++y >= video_num_lines ) { + y--; + } + } else { + ptr = VIDMEM + ((y*video_num_columns*params->bytes_per_char_v+x)*bytes_per_char_h); + ll_write_char(ptr, c|(white<<16)); + if ( ++x >= video_num_columns ) { + x = 0; + if ( ++y >= video_num_lines ) { + y--; + } + } + } + } + + params->video_x = x; + params->video_y = y; +} + +static void error(char *x); + +/* + * Setup for decompression + */ +static void arch_decomp_setup(void) +{ + int i; + + video_num_lines = params->video_num_rows; + video_num_columns = params->video_num_cols; + bytes_per_char_h = params->bytes_per_char_h; + video_size_row = video_num_columns * bytes_per_char_h; + if (bytes_per_char_h == 4) + for (i = 0; i < 256; i++) + con_charconvtable[i] = + (i & 128 ? 1 << 0 : 0) | + (i & 64 ? 1 << 4 : 0) | + (i & 32 ? 1 << 8 : 0) | + (i & 16 ? 1 << 12 : 0) | + (i & 8 ? 1 << 16 : 0) | + (i & 4 ? 1 << 20 : 0) | + (i & 2 ? 1 << 24 : 0) | + (i & 1 ? 1 << 28 : 0); + else + for (i = 0; i < 16; i++) + con_charconvtable[i] = + (i & 8 ? 1 << 0 : 0) | + (i & 4 ? 1 << 8 : 0) | + (i & 2 ? 1 << 16 : 0) | + (i & 1 ? 1 << 24 : 0); + + white = bytes_per_char_h == 8 ? 0xfc : 7; + + if (params->nr_pages * params->page_size < 4096*1024) error("<4M of mem\n"); +} + +/* + * nothing to do + */ +#define arch_decomp_wdog() diff --git a/arch/arm26/boot/compressed/vmlinux.lds.in b/arch/arm26/boot/compressed/vmlinux.lds.in new file mode 100644 index 000000000000..86d821d5ab70 --- /dev/null +++ b/arch/arm26/boot/compressed/vmlinux.lds.in @@ -0,0 +1,60 @@ +/* + * linux/arch/arm26/boot/compressed/vmlinux.lds.in + * + * Copyright (C) 2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +OUTPUT_ARCH(arm) +ENTRY(_start) +SECTIONS +{ + . = LOAD_ADDR; + _load_addr = .; + + . = TEXT_START; + _text = .; + + .text : { + _start = .; + *(.start) + *(.text) + *(.fixup) + *(.gnu.warning) + *(.rodata) + *(.rodata.*) + *(.glue_7) + *(.glue_7t) + input_data = .; + arch/arm26/boot/compressed/piggy.o + input_data_end = .; + . = ALIGN(4); + } + + _etext = .; + + _got_start = .; + .got : { *(.got) } + _got_end = .; + .got.plt : { *(.got.plt) } + .data : { *(.data) } + _edata = .; + + . = BSS_START; + __bss_start = .; + .bss : { *(.bss) } + _end = .; + + .stack (NOLOAD) : { *(.stack) } + + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} + diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh new file mode 100644 index 000000000000..c628328dd9ec --- /dev/null +++ b/arch/arm26/boot/install.sh @@ -0,0 +1,62 @@ +#!/bin/sh +# +# arch/arm26/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# Adapted from code in arch/i386/boot/install.sh by Russell King +# Stolen from arm32 by Ian Molton +# +# "make install" script for arm architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +# User may have a custom install script + +if [ -x /sbin/installkernel ]; then + exec /sbin/installkernel "$@" +fi + +if [ "$2" = "zImage" ]; then +# Compressed install + echo "Installing compressed kernel" + if [ -f $4/vmlinuz-$1 ]; then + mv $4/vmlinuz-$1 $4/vmlinuz.old + fi + + if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System.old + fi + + cat $2 > $4/vmlinuz-$1 + cp $3 $4/System.map-$1 +else +# Normal install + echo "Installing normal kernel" + if [ -f $4/vmlinux-$1 ]; then + mv $4/vmlinux-$1 $4/vmlinux.old + fi + + if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old + fi + + cat $2 > $4/vmlinux-$1 + cp $3 $4/System.map +fi + +if [ -x /sbin/loadmap ]; then + /sbin/loadmap --rdev /dev/ima +else + echo "You have to install it yourself" +fi diff --git a/arch/arm26/defconfig b/arch/arm26/defconfig new file mode 100644 index 000000000000..c4a89703c3d8 --- /dev/null +++ b/arch/arm26/defconfig @@ -0,0 +1,362 @@ +# +# Automatically generated by make menuconfig: don't edit +# +CONFIG_ARM=y +# CONFIG_EISA is not set +# CONFIG_SBUS is not set +# CONFIG_MCA is not set +CONFIG_UID16=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set +# CONFIG_GENERIC_BUST_SPINLOCK is not set +# CONFIG_GENERIC_ISA_DMA is not set + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# General setup +# +# CONFIG_NET is not set +# CONFIG_SYSVIPC is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_SYSCTL is not set + +# +# Loadable module support +# +# CONFIG_MODULES is not set + +# +# System Type +# +CONFIG_ARCH_ARC=y +# CONFIG_ARCH_A5K is not set +CONFIG_ARCH_ACORN=y +# CONFIG_CPU_32 is not set +CONFIG_CPU_26=y +# CONFIG_PAGESIZE_16 is not set + +# +# General setup +# +CONFIG_FIQ=y +# CONFIG_ZBOOT_ROM is not set +CONFIG_ZBOOT_ROM_TEXT=0 +CONFIG_ZBOOT_ROM_BSS=0 +CONFIG_FPE_NWFPE=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_ELF is not set +# CONFIG_BINFMT_MISC is not set +CONFIG_CMDLINE="" +# CONFIG_ALIGNMENT_TRAP is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set +# CONFIG_ISAPNP is not set +# CONFIG_PNPBIOS is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_CISS_SCSI_TAPE is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_BLK_DEV_INITRD is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set +# CONFIG_BLK_DEV_MD is not set +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID5 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_BLK_DEV_LVM is not set + +# +# Acorn-specific block devices +# +# CONFIG_BLK_DEV_FD1772 is not set +# CONFIG_BLK_DEV_MFM is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set + +# +# ISDN subsystem +# + +# +# Input device support +# +# CONFIG_INPUT is not set +# CONFIG_INPUT_KEYBDEV is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_TSLIBDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set +# CONFIG_INPUT_UINPUT is not set +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_VORTEX is not set +# CONFIG_GAMEPORT_FM801 is not set +# CONFIG_GAMEPORT_CS461x is not set +# CONFIG_SERIO is not set +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_ACORN is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set +# CONFIG_SERIAL_8250_CONSOLE is not set +# CONFIG_SERIAL_8250_CS is not set +# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250_MANY_PORTS is not set +# CONFIG_SERIAL_8250_SHARE_IRQ is not set +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set +# CONFIG_ATOMWIDE_SERIAL is not set +# CONFIG_DUALSP_SERIAL is not set +# CONFIG_SERIAL_AMBA is not set +# CONFIG_SERIAL_AMBA_CONSOLE is not set +# CONFIG_SERIAL_CLPS711X is not set +# CONFIG_SERIAL_CLPS711X_CONSOLE is not set +# CONFIG_SERIAL_CLPS711X_OLD_NAME is not set +# CONFIG_SERIAL_21285 is not set +# CONFIG_SERIAL_21285_OLD is not set +# CONFIG_SERIAL_21285_CONSOLE is not set +# CONFIG_SERIAL_UART00 is not set +# CONFIG_SERIAL_UART00_CONSOLE is not set +# CONFIG_SERIAL_SA1100 is not set +# CONFIG_SERIAL_SA1100_CONSOLE is not set +# CONFIG_UNIX98_PTYS is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_ALGOBIT=y +CONFIG_I2C_ALGOPCF=y +# CONFIG_I2C_ELEKTOR is not set +CONFIG_I2C_CHARDEV=y +# CONFIG_I2C_PROC is not set + +# +# L3 serial bus support +# +# CONFIG_L3 is not set +# CONFIG_L3_ALGOBIT is not set +# CONFIG_L3_BIT_SA1100_GPIO is not set +# CONFIG_L3_SA1111 is not set +# CONFIG_BIT_SA1100_GPIO is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +# CONFIG_PSMOUSE is not set +# CONFIG_QIC02_TAPE is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_QFMT_V1 is not set +# CONFIG_QFMT_V2 is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_JBD_DEBUG is not set +# CONFIG_FAT_FS is not set +# CONFIG_MSDOS_FS is not set +# CONFIG_UMSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_TMPFS is not set +CONFIG_RAMFS=y +# CONFIG_ISO9660_FS is not set +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_JFS_FS is not set +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_DEBUG is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +# CONFIG_DEVPTS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_SMB_FS is not set +# CONFIG_ZISOFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +CONFIG_ACORN_PARTITION=y +# CONFIG_ACORN_PARTITION_EESOX is not set +# CONFIG_ACORN_PARTITION_ICS is not set +CONFIG_ACORN_PARTITION_ADFS=y +# CONFIG_ACORN_PARTITION_POWERTEC is not set +CONFIG_ACORN_PARTITION_RISCIX=y +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SMB_NLS is not set +# CONFIG_NLS is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# Multimedia Capabilities Port drivers +# +# CONFIG_MCP is not set +# CONFIG_MCP_SA1100 is not set +# CONFIG_MCP_UCB1200 is not set +# CONFIG_MCP_UCB1200_AUDIO is not set +# CONFIG_MCP_UCB1200_TS is not set + +# +# Console Switches +# +# CONFIG_SWITCHES is not set +# CONFIG_SWITCHES_SA1100 is not set +# CONFIG_SWITCHES_UCB1X00 is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# Kernel hacking +# +# CONFIG_NO_FRAME_POINTER is not set +CONFIG_DEBUG_USER=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SLAB=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_WAITQ=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_ERRORS=y +CONFIG_DEBUG_LL=y + +# +# Security options +# +CONFIG_SECURITY_CAPABILITIES=y + +# +# Library routines +# +CONFIG_CRC32=y +# CONFIG_ZLIB_INFLATE is not set +# CONFIG_ZLIB_DEFLATE is not set diff --git a/arch/arm26/kernel/Makefile b/arch/arm26/kernel/Makefile new file mode 100644 index 000000000000..ee9fb49fdb78 --- /dev/null +++ b/arch/arm26/kernel/Makefile @@ -0,0 +1,17 @@ +# +# Makefile for the linux kernel. +# + +# Object file lists. + +AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR) + +obj-y := compat.o dma.o entry.o irq.o process.o ptrace.o \ + semaphore.o setup.o signal.o sys_arm.o time.o traps.o \ + ecard.o dma.o ecard.o fiq.o time.o + +extra-y := head.o init_task.o vmlinux.lds + +obj-$(CONFIG_FIQ) += fiq.o +obj-$(CONFIG_MODULES) += armksyms.o + diff --git a/arch/arm26/kernel/armksyms.c b/arch/arm26/kernel/armksyms.c new file mode 100644 index 000000000000..35514b398e2e --- /dev/null +++ b/arch/arm26/kernel/armksyms.c @@ -0,0 +1,220 @@ +/* + * linux/arch/arm26/kernel/armksyms.c + * + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/user.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/delay.h> +#include <linux/in6.h> +#include <linux/interrupt.h> +#include <linux/pm.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> +#include <linux/smp_lock.h> +#include <linux/syscalls.h> + +#include <asm/byteorder.h> +#include <asm/elf.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/semaphore.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/checksum.h> +#include <asm/mach-types.h> + +extern void dump_thread(struct pt_regs *, struct user *); +extern int dump_fpu(struct pt_regs *, struct user_fp_struct *); +extern void inswb(unsigned int port, void *to, int len); +extern void outswb(unsigned int port, const void *to, int len); + +extern void __bad_xchg(volatile void *ptr, int size); + +/* + * libgcc functions - functions that are used internally by the + * compiler... (prototypes are not correct though, but that + * doesn't really matter since they're not versioned). + */ +extern void __ashldi3(void); +extern void __ashrdi3(void); +extern void __divsi3(void); +extern void __lshrdi3(void); +extern void __modsi3(void); +extern void __muldi3(void); +extern void __ucmpdi2(void); +extern void __udivdi3(void); +extern void __umoddi3(void); +extern void __udivmoddi4(void); +extern void __udivsi3(void); +extern void __umodsi3(void); +extern void abort(void); + +extern void ret_from_exception(void); +extern void fpundefinstr(void); +extern void fp_enter(void); + +/* + * This has a special calling convention; it doesn't + * modify any of the usual registers, except for LR. + * FIXME - we used to use our own local version - looks to be in kernel/softirq now + */ +//extern void __do_softirq(void); + +#define EXPORT_SYMBOL_ALIAS(sym,orig) \ + const char __kstrtab_##sym[] \ + __attribute__((section(".kstrtab"))) = \ + __MODULE_STRING(sym); \ + const struct module_symbol __ksymtab_##sym \ + __attribute__((section("__ksymtab"))) = \ + { (unsigned long)&orig, __kstrtab_##sym }; + +/* + * floating point math emulator support. + * These symbols will never change their calling convention... + */ +EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter); +EXPORT_SYMBOL_ALIAS(fp_printk,printk); +EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig); + +EXPORT_SYMBOL(fpundefinstr); +EXPORT_SYMBOL(ret_from_exception); + +#ifdef CONFIG_VT +EXPORT_SYMBOL(kd_mksound); +#endif + +//EXPORT_SYMBOL(__do_softirq); + + /* platform dependent support */ +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(udelay); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(system_rev); +EXPORT_SYMBOL(system_serial_low); +EXPORT_SYMBOL(system_serial_high); +#ifdef CONFIG_DEBUG_BUGVERBOSE +EXPORT_SYMBOL(__bug); +#endif +EXPORT_SYMBOL(__bad_xchg); +EXPORT_SYMBOL(__readwrite_bug); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(set_irq_type); +EXPORT_SYMBOL(pm_idle); +EXPORT_SYMBOL(pm_power_off); + + /* processor dependencies */ +EXPORT_SYMBOL(__machine_arch_type); + + /* networking */ +EXPORT_SYMBOL(csum_partial_copy_nocheck); +EXPORT_SYMBOL(__csum_ipv6_magic); + + /* io */ +#ifndef __raw_readsb +EXPORT_SYMBOL(__raw_readsb); +#endif +#ifndef __raw_readsw +EXPORT_SYMBOL(__raw_readsw); +#endif +#ifndef __raw_readsl +EXPORT_SYMBOL(__raw_readsl); +#endif +#ifndef __raw_writesb +EXPORT_SYMBOL(__raw_writesb); +#endif +#ifndef __raw_writesw +EXPORT_SYMBOL(__raw_writesw); +#endif +#ifndef __raw_writesl +EXPORT_SYMBOL(__raw_writesl); +#endif + + /* string / mem functions */ +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strncat); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strstr); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memscan); +EXPORT_SYMBOL(__memzero); + + /* user mem (segment) */ +EXPORT_SYMBOL(uaccess_kernel); +EXPORT_SYMBOL(uaccess_user); + +EXPORT_SYMBOL(__get_user_1); +EXPORT_SYMBOL(__get_user_2); +EXPORT_SYMBOL(__get_user_4); +EXPORT_SYMBOL(__get_user_8); + +EXPORT_SYMBOL(__put_user_1); +EXPORT_SYMBOL(__put_user_2); +EXPORT_SYMBOL(__put_user_4); +EXPORT_SYMBOL(__put_user_8); + + /* gcc lib functions */ +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__divsi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__modsi3); +EXPORT_SYMBOL(__muldi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__udivdi3); +EXPORT_SYMBOL(__umoddi3); +EXPORT_SYMBOL(__udivmoddi4); +EXPORT_SYMBOL(__udivsi3); +EXPORT_SYMBOL(__umodsi3); + + /* bitops */ +EXPORT_SYMBOL(_set_bit_le); +EXPORT_SYMBOL(_test_and_set_bit_le); +EXPORT_SYMBOL(_clear_bit_le); +EXPORT_SYMBOL(_test_and_clear_bit_le); +EXPORT_SYMBOL(_change_bit_le); +EXPORT_SYMBOL(_test_and_change_bit_le); +EXPORT_SYMBOL(_find_first_zero_bit_le); +EXPORT_SYMBOL(_find_next_zero_bit_le); + + /* elf */ +EXPORT_SYMBOL(elf_platform); +EXPORT_SYMBOL(elf_hwcap); + + /* syscalls */ +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_read); +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_open); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_wait4); + +EXPORT_SYMBOL(get_wchan); + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(kernel_flag); +#endif diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c new file mode 100644 index 000000000000..4ccacaef94df --- /dev/null +++ b/arch/arm26/kernel/asm-offsets.c @@ -0,0 +1,63 @@ +/* + * Copyright (C) 1995-2001 Russell King + * 2001-2002 Keith Owens + * 2003 Ian Molton + * + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> + +/* + * Make sure that the compiler and target are compatible. + */ +#if defined(__APCS_32__) && defined(CONFIG_CPU_26) +#error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26 +#endif +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#error Sorry, your compiler is known to miscompile kernels. Only use gcc 2.95.3 and later. +#endif +#if __GNUC__ == 2 && __GNUC_MINOR__ == 95 +/* shame we can't detect the .1 or .2 releases */ +#warning GCC 2.95.2 and earlier miscompiles kernels. +#endif + +/* Use marker if you need to separate the values later */ + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +int main(void) +{ + DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + BLANK(); + DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); + DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); + BLANK(); + DEFINE(VM_EXEC, VM_EXEC); + BLANK(); + BLANK(); + DEFINE(PAGE_PRESENT, _PAGE_PRESENT); + DEFINE(PAGE_READONLY, _PAGE_READONLY); + DEFINE(PAGE_NOT_USER, _PAGE_NOT_USER); + DEFINE(PAGE_OLD, _PAGE_OLD); + DEFINE(PAGE_CLEAN, _PAGE_CLEAN); + BLANK(); + DEFINE(PAGE_SZ, PAGE_SIZE); + BLANK(); + DEFINE(SYS_ERROR0, 0x9f0000); + return 0; +} diff --git a/arch/arm26/kernel/calls.S b/arch/arm26/kernel/calls.S new file mode 100644 index 000000000000..e3d276827c84 --- /dev/null +++ b/arch/arm26/kernel/calls.S @@ -0,0 +1,265 @@ +/* + * linux/arch/arm26/kernel/calls.S + * + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * FIXME + * This file is included twice in entry.S which may not be necessary + */ + +//FIXME - clearly NR_syscalls is never defined here + +#ifndef NR_syscalls +#define NR_syscalls 256 +#else + +__syscall_start: +/* 0 */ .long sys_ni_syscall + .long sys_exit + .long sys_fork_wrapper + .long sys_read + .long sys_write +/* 5 */ .long sys_open + .long sys_close + .long sys_ni_syscall /* was sys_waitpid */ + .long sys_creat + .long sys_link +/* 10 */ .long sys_unlink + .long sys_execve_wrapper + .long sys_chdir + .long sys_time /* used by libc4 */ + .long sys_mknod +/* 15 */ .long sys_chmod + .long sys_lchown16 + .long sys_ni_syscall /* was sys_break */ + .long sys_ni_syscall /* was sys_stat */ + .long sys_lseek +/* 20 */ .long sys_getpid + .long sys_mount + .long sys_oldumount /* used by libc4 */ + .long sys_setuid16 + .long sys_getuid16 +/* 25 */ .long sys_stime + .long sys_ptrace + .long sys_alarm /* used by libc4 */ + .long sys_ni_syscall /* was sys_fstat */ + .long sys_pause +/* 30 */ .long sys_utime /* used by libc4 */ + .long sys_ni_syscall /* was sys_stty */ + .long sys_ni_syscall /* was sys_getty */ + .long sys_access + .long sys_nice +/* 35 */ .long sys_ni_syscall /* was sys_ftime */ + .long sys_sync + .long sys_kill + .long sys_rename + .long sys_mkdir +/* 40 */ .long sys_rmdir + .long sys_dup + .long sys_pipe + .long sys_times + .long sys_ni_syscall /* was sys_prof */ +/* 45 */ .long sys_brk + .long sys_setgid16 + .long sys_getgid16 + .long sys_ni_syscall /* was sys_signal */ + .long sys_geteuid16 +/* 50 */ .long sys_getegid16 + .long sys_acct + .long sys_umount + .long sys_ni_syscall /* was sys_lock */ + .long sys_ioctl +/* 55 */ .long sys_fcntl + .long sys_ni_syscall /* was sys_mpx */ + .long sys_setpgid + .long sys_ni_syscall /* was sys_ulimit */ + .long sys_ni_syscall /* was sys_olduname */ +/* 60 */ .long sys_umask + .long sys_chroot + .long sys_ustat + .long sys_dup2 + .long sys_getppid +/* 65 */ .long sys_getpgrp + .long sys_setsid + .long sys_sigaction + .long sys_ni_syscall /* was sys_sgetmask */ + .long sys_ni_syscall /* was sys_ssetmask */ +/* 70 */ .long sys_setreuid16 + .long sys_setregid16 + .long sys_sigsuspend_wrapper + .long sys_sigpending + .long sys_sethostname +/* 75 */ .long sys_setrlimit + .long sys_old_getrlimit /* used by libc4 */ + .long sys_getrusage + .long sys_gettimeofday + .long sys_settimeofday +/* 80 */ .long sys_getgroups16 + .long sys_setgroups16 + .long old_select /* used by libc4 */ + .long sys_symlink + .long sys_ni_syscall /* was sys_lstat */ +/* 85 */ .long sys_readlink + .long sys_uselib + .long sys_swapon + .long sys_reboot + .long old_readdir /* used by libc4 */ +/* 90 */ .long old_mmap /* used by libc4 */ + .long sys_munmap + .long sys_truncate + .long sys_ftruncate + .long sys_fchmod +/* 95 */ .long sys_fchown16 + .long sys_getpriority + .long sys_setpriority + .long sys_ni_syscall /* was sys_profil */ + .long sys_statfs +/* 100 */ .long sys_fstatfs + .long sys_ni_syscall + .long sys_socketcall + .long sys_syslog + .long sys_setitimer +/* 105 */ .long sys_getitimer + .long sys_newstat + .long sys_newlstat + .long sys_newfstat + .long sys_ni_syscall /* was sys_uname */ +/* 110 */ .long sys_ni_syscall /* was sys_iopl */ + .long sys_vhangup + .long sys_ni_syscall + .long sys_syscall /* call a syscall */ + .long sys_wait4 +/* 115 */ .long sys_swapoff + .long sys_sysinfo + .long sys_ipc + .long sys_fsync + .long sys_sigreturn_wrapper +/* 120 */ .long sys_clone_wapper + .long sys_setdomainname + .long sys_newuname + .long sys_ni_syscall + .long sys_adjtimex +/* 125 */ .long sys_mprotect + .long sys_sigprocmask + .long sys_ni_syscall /* WAS: sys_create_module */ + .long sys_init_module + .long sys_delete_module +/* 130 */ .long sys_ni_syscall /* WAS: sys_get_kernel_syms */ + .long sys_quotactl + .long sys_getpgid + .long sys_fchdir + .long sys_bdflush +/* 135 */ .long sys_sysfs + .long sys_personality + .long sys_ni_syscall /* .long _sys_afs_syscall */ + .long sys_setfsuid16 + .long sys_setfsgid16 +/* 140 */ .long sys_llseek + .long sys_getdents + .long sys_select + .long sys_flock + .long sys_msync +/* 145 */ .long sys_readv + .long sys_writev + .long sys_getsid + .long sys_fdatasync + .long sys_sysctl +/* 150 */ .long sys_mlock + .long sys_munlock + .long sys_mlockall + .long sys_munlockall + .long sys_sched_setparam +/* 155 */ .long sys_sched_getparam + .long sys_sched_setscheduler + .long sys_sched_getscheduler + .long sys_sched_yield + .long sys_sched_get_priority_max +/* 160 */ .long sys_sched_get_priority_min + .long sys_sched_rr_get_interval + .long sys_nanosleep + .long sys_arm_mremap + .long sys_setresuid16 +/* 165 */ .long sys_getresuid16 + .long sys_ni_syscall + .long sys_ni_syscall /* WAS: sys_query_module */ + .long sys_poll + .long sys_nfsservctl +/* 170 */ .long sys_setresgid16 + .long sys_getresgid16 + .long sys_prctl + .long sys_rt_sigreturn_wrapper + .long sys_rt_sigaction +/* 175 */ .long sys_rt_sigprocmask + .long sys_rt_sigpending + .long sys_rt_sigtimedwait + .long sys_rt_sigqueueinfo + .long sys_rt_sigsuspend_wrapper +/* 180 */ .long sys_pread64 + .long sys_pwrite64 + .long sys_chown16 + .long sys_getcwd + .long sys_capget +/* 185 */ .long sys_capset + .long sys_sigaltstack_wrapper + .long sys_sendfile + .long sys_ni_syscall + .long sys_ni_syscall +/* 190 */ .long sys_vfork_wrapper + .long sys_getrlimit + .long sys_mmap2 + .long sys_truncate64 + .long sys_ftruncate64 +/* 195 */ .long sys_stat64 + .long sys_lstat64 + .long sys_fstat64 + .long sys_lchown + .long sys_getuid +/* 200 */ .long sys_getgid + .long sys_geteuid + .long sys_getegid + .long sys_setreuid + .long sys_setregid +/* 205 */ .long sys_getgroups + .long sys_setgroups + .long sys_fchown + .long sys_setresuid + .long sys_getresuid +/* 210 */ .long sys_setresgid + .long sys_getresgid + .long sys_chown + .long sys_setuid + .long sys_setgid +/* 215 */ .long sys_setfsuid + .long sys_setfsgid + .long sys_getdents64 + .long sys_pivot_root + .long sys_mincore +/* 220 */ .long sys_madvise + .long sys_fcntl64 + .long sys_ni_syscall /* TUX */ + .long sys_ni_syscall /* WAS: sys_security */ + .long sys_gettid +/* 225 */ .long sys_readahead + .long sys_setxattr + .long sys_lsetxattr + .long sys_fsetxattr + .long sys_getxattr +/* 230 */ .long sys_lgetxattr + .long sys_fgetxattr + .long sys_listxattr + .long sys_llistxattr + .long sys_flistxattr +/* 235 */ .long sys_removexattr + .long sys_lremovexattr + .long sys_fremovexattr + .long sys_tkill +__syscall_end: + + .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 + .long sys_ni_syscall + .endr +#endif diff --git a/arch/arm26/kernel/compat.c b/arch/arm26/kernel/compat.c new file mode 100644 index 000000000000..db0310db8998 --- /dev/null +++ b/arch/arm26/kernel/compat.c @@ -0,0 +1,174 @@ +/* + * linux/arch/arm26/kernel/compat.c + * + * Copyright (C) 2001 Russell King + * 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * We keep the old params compatibility cruft in one place (here) + * so we don't end up with lots of mess around other places. + * + * NOTE: + * The old struct param_struct is deprecated, but it will be kept in + * the kernel for 5 years from now (2001). This will allow boot loaders + * to convert to the new struct tag way. + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> + +#include <asm/setup.h> +#include <asm/mach-types.h> +#include <asm/page.h> + +//#include <asm/arch.h> +//#include <asm/mach/irq.h> + +/* + * Usage: + * - do not go blindly adding fields, add them at the end + * - when adding fields, don't rely on the address until + * a patch from me has been released + * - unused fields should be zero (for future expansion) + * - this structure is relatively short-lived - only + * guaranteed to contain useful data in setup_arch() + * + * This is the old deprecated way to pass parameters to the kernel + */ +struct param_struct { + union { + struct { + unsigned long page_size; /* 0 */ + unsigned long nr_pages; /* 4 */ + unsigned long ramdisk_size; /* 8 */ + unsigned long flags; /* 12 */ +#define FLAG_READONLY 1 +#define FLAG_RDLOAD 4 +#define FLAG_RDPROMPT 8 + unsigned long rootdev; /* 16 */ + unsigned long video_num_cols; /* 20 */ + unsigned long video_num_rows; /* 24 */ + unsigned long video_x; /* 28 */ + unsigned long video_y; /* 32 */ + unsigned long memc_control_reg; /* 36 */ + unsigned char sounddefault; /* 40 */ + unsigned char adfsdrives; /* 41 */ + unsigned char bytes_per_char_h; /* 42 */ + unsigned char bytes_per_char_v; /* 43 */ + unsigned long pages_in_bank[4]; /* 44 */ + unsigned long pages_in_vram; /* 60 */ + unsigned long initrd_start; /* 64 */ + unsigned long initrd_size; /* 68 */ + unsigned long rd_start; /* 72 */ + unsigned long system_rev; /* 76 */ + unsigned long system_serial_low; /* 80 */ + unsigned long system_serial_high; /* 84 */ + unsigned long mem_fclk_21285; /* 88 */ + } s; + char unused[256]; + } u1; + union { + char paths[8][128]; + struct { + unsigned long magic; + char n[1024 - sizeof(unsigned long)]; + } s; + } u2; + char commandline[COMMAND_LINE_SIZE]; +}; + +static struct tag * __init memtag(struct tag *tag, unsigned long start, unsigned long size) +{ + tag = tag_next(tag); + tag->hdr.tag = ATAG_MEM; + tag->hdr.size = tag_size(tag_mem32); + tag->u.mem.size = size; + tag->u.mem.start = start; + + return tag; +} + +static void __init build_tag_list(struct param_struct *params, void *taglist) +{ + struct tag *tag = taglist; + + if (params->u1.s.page_size != PAGE_SIZE) { + printk(KERN_WARNING "Warning: bad configuration page, " + "trying to continue\n"); + return; + } + + printk(KERN_DEBUG "Converting old-style param struct to taglist\n"); + + tag->hdr.tag = ATAG_CORE; + tag->hdr.size = tag_size(tag_core); + tag->u.core.flags = params->u1.s.flags & FLAG_READONLY; + tag->u.core.pagesize = params->u1.s.page_size; + tag->u.core.rootdev = params->u1.s.rootdev; + + tag = tag_next(tag); + tag->hdr.tag = ATAG_RAMDISK; + tag->hdr.size = tag_size(tag_ramdisk); + tag->u.ramdisk.flags = (params->u1.s.flags & FLAG_RDLOAD ? 1 : 0) | + (params->u1.s.flags & FLAG_RDPROMPT ? 2 : 0); + tag->u.ramdisk.size = params->u1.s.ramdisk_size; + tag->u.ramdisk.start = params->u1.s.rd_start; + + tag = tag_next(tag); + tag->hdr.tag = ATAG_INITRD; + tag->hdr.size = tag_size(tag_initrd); + tag->u.initrd.start = params->u1.s.initrd_start; + tag->u.initrd.size = params->u1.s.initrd_size; + + tag = tag_next(tag); + tag->hdr.tag = ATAG_SERIAL; + tag->hdr.size = tag_size(tag_serialnr); + tag->u.serialnr.low = params->u1.s.system_serial_low; + tag->u.serialnr.high = params->u1.s.system_serial_high; + + tag = tag_next(tag); + tag->hdr.tag = ATAG_REVISION; + tag->hdr.size = tag_size(tag_revision); + tag->u.revision.rev = params->u1.s.system_rev; + + tag = memtag(tag, PHYS_OFFSET, params->u1.s.nr_pages * PAGE_SIZE); + + tag = tag_next(tag); + tag->hdr.tag = ATAG_ACORN; + tag->hdr.size = tag_size(tag_acorn); + tag->u.acorn.memc_control_reg = params->u1.s.memc_control_reg; + tag->u.acorn.vram_pages = params->u1.s.pages_in_vram; + tag->u.acorn.sounddefault = params->u1.s.sounddefault; + tag->u.acorn.adfsdrives = params->u1.s.adfsdrives; + + tag = tag_next(tag); + tag->hdr.tag = ATAG_CMDLINE; + tag->hdr.size = (strlen(params->commandline) + 3 + + sizeof(struct tag_header)) >> 2; + strcpy(tag->u.cmdline.cmdline, params->commandline); + + tag = tag_next(tag); + tag->hdr.tag = ATAG_NONE; + tag->hdr.size = 0; + + memmove(params, taglist, ((int)tag) - ((int)taglist) + + sizeof(struct tag_header)); +} + +void __init convert_to_tag_list(struct tag *tags) +{ + struct param_struct *params = (struct param_struct *)tags; + build_tag_list(params, ¶ms->u2); +} + +void __init squash_mem_tags(struct tag *tag) +{ + for (; tag->hdr.size; tag = tag_next(tag)) + if (tag->hdr.tag == ATAG_MEM) + tag->hdr.tag = ATAG_NONE; +} diff --git a/arch/arm26/kernel/dma.c b/arch/arm26/kernel/dma.c new file mode 100644 index 000000000000..80b5a774d905 --- /dev/null +++ b/arch/arm26/kernel/dma.c @@ -0,0 +1,273 @@ +/* + * linux/arch/arm26/kernel/dma.c + * + * Copyright (C) 1995-2000 Russell King + * 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Front-end to the DMA handling. This handles the allocation/freeing + * of DMA channels, and provides a unified interface to the machines + * DMA facilities. + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mman.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/errno.h> + +#include <asm/dma.h> + +DEFINE_SPINLOCK(dma_spin_lock); + +static dma_t dma_chan[MAX_DMA_CHANNELS]; + +/* + * Get dma list for /proc/dma + */ +int get_dma_list(char *buf) +{ + dma_t *dma; + char *p = buf; + int i; + + for (i = 0, dma = dma_chan; i < MAX_DMA_CHANNELS; i++, dma++) + if (dma->lock) + p += sprintf(p, "%2d: %14s %s\n", i, + dma->d_ops->type, dma->device_id); + + return p - buf; +} + +/* + * Request DMA channel + * + * On certain platforms, we have to allocate an interrupt as well... + */ +int request_dma(dmach_t channel, const char *device_id) +{ + dma_t *dma = dma_chan + channel; + int ret; + + if (channel >= MAX_DMA_CHANNELS || !dma->d_ops) + goto bad_dma; + + if (xchg(&dma->lock, 1) != 0) + goto busy; + + dma->device_id = device_id; + dma->active = 0; + dma->invalid = 1; + + ret = 0; + if (dma->d_ops->request) + ret = dma->d_ops->request(channel, dma); + + if (ret) + xchg(&dma->lock, 0); + + return ret; + +bad_dma: + printk(KERN_ERR "dma: trying to allocate DMA%d\n", channel); + return -EINVAL; + +busy: + return -EBUSY; +} + +/* + * Free DMA channel + * + * On certain platforms, we have to free interrupt as well... + */ +void free_dma(dmach_t channel) +{ + dma_t *dma = dma_chan + channel; + + if (channel >= MAX_DMA_CHANNELS || !dma->d_ops) + goto bad_dma; + + if (dma->active) { + printk(KERN_ERR "dma%d: freeing active DMA\n", channel); + dma->d_ops->disable(channel, dma); + dma->active = 0; + } + + if (xchg(&dma->lock, 0) != 0) { + if (dma->d_ops->free) + dma->d_ops->free(channel, dma); + return; + } + + printk(KERN_ERR "dma%d: trying to free free DMA\n", channel); + return; + +bad_dma: + printk(KERN_ERR "dma: trying to free DMA%d\n", channel); +} + +/* Set DMA Scatter-Gather list + */ +void set_dma_sg (dmach_t channel, struct scatterlist *sg, int nr_sg) +{ + dma_t *dma = dma_chan + channel; + + if (dma->active) + printk(KERN_ERR "dma%d: altering DMA SG while " + "DMA active\n", channel); + + dma->sg = sg; + dma->sgcount = nr_sg; + dma->using_sg = 1; + dma->invalid = 1; +} + +/* Set DMA address + * + * Copy address to the structure, and set the invalid bit + */ +void set_dma_addr (dmach_t channel, unsigned long physaddr) +{ + dma_t *dma = dma_chan + channel; + + if (dma->active) + printk(KERN_ERR "dma%d: altering DMA address while " + "DMA active\n", channel); + + dma->sg = &dma->buf; + dma->sgcount = 1; + dma->buf.__address = (char *)physaddr;//FIXME - not pretty + dma->using_sg = 0; + dma->invalid = 1; +} + +/* Set DMA byte count + * + * Copy address to the structure, and set the invalid bit + */ +void set_dma_count (dmach_t channel, unsigned long count) +{ + dma_t *dma = dma_chan + channel; + + if (dma->active) + printk(KERN_ERR "dma%d: altering DMA count while " + "DMA active\n", channel); + + dma->sg = &dma->buf; + dma->sgcount = 1; + dma->buf.length = count; + dma->using_sg = 0; + dma->invalid = 1; +} + +/* Set DMA direction mode + */ +void set_dma_mode (dmach_t channel, dmamode_t mode) +{ + dma_t *dma = dma_chan + channel; + + if (dma->active) + printk(KERN_ERR "dma%d: altering DMA mode while " + "DMA active\n", channel); + + dma->dma_mode = mode; + dma->invalid = 1; +} + +/* Enable DMA channel + */ +void enable_dma (dmach_t channel) +{ + dma_t *dma = dma_chan + channel; + + if (!dma->lock) + goto free_dma; + + if (dma->active == 0) { + dma->active = 1; + dma->d_ops->enable(channel, dma); + } + return; + +free_dma: + printk(KERN_ERR "dma%d: trying to enable free DMA\n", channel); + BUG(); +} + +/* Disable DMA channel + */ +void disable_dma (dmach_t channel) +{ + dma_t *dma = dma_chan + channel; + + if (!dma->lock) + goto free_dma; + + if (dma->active == 1) { + dma->active = 0; + dma->d_ops->disable(channel, dma); + } + return; + +free_dma: + printk(KERN_ERR "dma%d: trying to disable free DMA\n", channel); + BUG(); +} + +/* + * Is the specified DMA channel active? + */ +int dma_channel_active(dmach_t channel) +{ + return dma_chan[channel].active; +} + +void set_dma_page(dmach_t channel, char pagenr) +{ + printk(KERN_ERR "dma%d: trying to set_dma_page\n", channel); +} + +void set_dma_speed(dmach_t channel, int cycle_ns) +{ + dma_t *dma = dma_chan + channel; + int ret = 0; + + if (dma->d_ops->setspeed) + ret = dma->d_ops->setspeed(channel, dma, cycle_ns); + dma->speed = ret; +} + +int get_dma_residue(dmach_t channel) +{ + dma_t *dma = dma_chan + channel; + int ret = 0; + + if (dma->d_ops->residue) + ret = dma->d_ops->residue(channel, dma); + + return ret; +} + +void __init init_dma(void) +{ + arch_dma_init(dma_chan); +} + +EXPORT_SYMBOL(request_dma); +EXPORT_SYMBOL(free_dma); +EXPORT_SYMBOL(enable_dma); +EXPORT_SYMBOL(disable_dma); +EXPORT_SYMBOL(set_dma_addr); +EXPORT_SYMBOL(set_dma_count); +EXPORT_SYMBOL(set_dma_mode); +EXPORT_SYMBOL(set_dma_page); +EXPORT_SYMBOL(get_dma_residue); +EXPORT_SYMBOL(set_dma_sg); +EXPORT_SYMBOL(set_dma_speed); + +EXPORT_SYMBOL(dma_spin_lock); diff --git a/arch/arm26/kernel/ecard.c b/arch/arm26/kernel/ecard.c new file mode 100644 index 000000000000..824c6b571ad9 --- /dev/null +++ b/arch/arm26/kernel/ecard.c @@ -0,0 +1,850 @@ +/* + * linux/arch/arm26/kernel/ecard.c + * + * Copyright 1995-2001 Russell King + * Copyright 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Find all installed expansion cards, and handle interrupts from them. + * + * Created from information from Acorns RiscOS3 PRMs + * 15-Jun-2003 IM Modified from ARM32 (RiscPC capable) version + * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. + * 06-May-1997 RMK Added blacklist for cards whose loader doesn't work. + * 12-Sep-1997 RMK Created new handling of interrupt enables/disables + * - cards can now register their own routine to control + * interrupts (recommended). + * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled + * on reset from Linux. (Caused cards not to respond + * under RiscOS without hard reset). + * + */ +#define ECARD_C + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/reboot.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/device.h> +#include <linux/init.h> + +#include <asm/dma.h> +#include <asm/ecard.h> +#include <asm/hardware.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/irqchip.h> +#include <asm/tlbflush.h> + +enum req { + req_readbytes, + req_reset +}; + +struct ecard_request { + enum req req; + ecard_t *ec; + unsigned int address; + unsigned int length; + unsigned int use_loader; + void *buffer; +}; + +struct expcard_blacklist { + unsigned short manufacturer; + unsigned short product; + const char *type; +}; + +static ecard_t *cards; +static ecard_t *slot_to_expcard[MAX_ECARDS]; +static unsigned int ectcr; + +/* List of descriptions of cards which don't have an extended + * identification, or chunk directories containing a description. + */ +static struct expcard_blacklist __initdata blacklist[] = { + { MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" } +}; + +asmlinkage extern int +ecard_loader_reset(volatile unsigned char *pa, loader_t loader); +asmlinkage extern int +ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader); + +static const struct ecard_id * +ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec); + +static inline unsigned short +ecard_getu16(unsigned char *v) +{ + return v[0] | v[1] << 8; +} + +static inline signed long +ecard_gets24(unsigned char *v) +{ + return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); +} + +static inline ecard_t * +slot_to_ecard(unsigned int slot) +{ + return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL; +} + +/* ===================== Expansion card daemon ======================== */ +/* + * Since the loader programs on the expansion cards need to be run + * in a specific environment, create a separate task with this + * environment up, and pass requests to this task as and when we + * need to. + * + * This should allow 99% of loaders to be called from Linux. + * + * From a security standpoint, we trust the card vendors. This + * may be a misplaced trust. + */ +#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE) +#define POD_INT_ADDR(x) ((volatile unsigned char *)\ + ((BUS_ADDR((x)) - IO_BASE) + IO_START)) + +static inline void ecard_task_reset(struct ecard_request *req) +{ + struct expansion_card *ec = req->ec; + if (ec->loader) + ecard_loader_reset(POD_INT_ADDR(ec->podaddr), ec->loader); +} + +static void +ecard_task_readbytes(struct ecard_request *req) +{ + unsigned char *buf = (unsigned char *)req->buffer; + volatile unsigned char *base_addr = + (volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr); + unsigned int len = req->length; + unsigned int off = req->address; + + if (!req->use_loader || !req->ec->loader) { + off *= 4; + while (len--) { + *buf++ = base_addr[off]; + off += 4; + } + } else { + while(len--) { + /* + * The following is required by some + * expansion card loader programs. + */ + *(unsigned long *)0x108 = 0; + *buf++ = ecard_loader_read(off++, base_addr, + req->ec->loader); + } + } +} + +static void ecard_do_request(struct ecard_request *req) +{ + switch (req->req) { + case req_readbytes: + ecard_task_readbytes(req); + break; + + case req_reset: + ecard_task_reset(req); + break; + } +} + +/* + * On 26-bit processors, we don't need the kcardd thread to access the + * expansion card loaders. We do it directly. + */ +#define ecard_call(req) ecard_do_request(req) + +/* ======================= Mid-level card control ===================== */ + +static void +ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) +{ + struct ecard_request req; + + req.req = req_readbytes; + req.ec = ec; + req.address = off; + req.length = len; + req.use_loader = useld; + req.buffer = addr; + + ecard_call(&req); +} + +int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) +{ + struct ex_chunk_dir excd; + int index = 16; + int useld = 0; + + if (!ec->cid.cd) + return 0; + + while(1) { + ecard_readbytes(&excd, ec, index, 8, useld); + index += 8; + if (c_id(&excd) == 0) { + if (!useld && ec->loader) { + useld = 1; + index = 0; + continue; + } + return 0; + } + if (c_id(&excd) == 0xf0) { /* link */ + index = c_start(&excd); + continue; + } + if (c_id(&excd) == 0x80) { /* loader */ + if (!ec->loader) { + ec->loader = (loader_t)kmalloc(c_len(&excd), + GFP_KERNEL); + if (ec->loader) + ecard_readbytes(ec->loader, ec, + (int)c_start(&excd), + c_len(&excd), useld); + else + return 0; + } + continue; + } + if (c_id(&excd) == id && num-- == 0) + break; + } + + if (c_id(&excd) & 0x80) { + switch (c_id(&excd) & 0x70) { + case 0x70: + ecard_readbytes((unsigned char *)excd.d.string, ec, + (int)c_start(&excd), c_len(&excd), + useld); + break; + case 0x00: + break; + } + } + cd->start_offset = c_start(&excd); + memcpy(cd->d.string, excd.d.string, 256); + return 1; +} + +/* ======================= Interrupt control ============================ */ + +static void ecard_def_irq_enable(ecard_t *ec, int irqnr) +{ +} + +static void ecard_def_irq_disable(ecard_t *ec, int irqnr) +{ +} + +static int ecard_def_irq_pending(ecard_t *ec) +{ + return !ec->irqmask || ec->irqaddr[0] & ec->irqmask; +} + +static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr) +{ + panic("ecard_def_fiq_enable called - impossible"); +} + +static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr) +{ + panic("ecard_def_fiq_disable called - impossible"); +} + +static int ecard_def_fiq_pending(ecard_t *ec) +{ + return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask; +} + +static expansioncard_ops_t ecard_default_ops = { + ecard_def_irq_enable, + ecard_def_irq_disable, + ecard_def_irq_pending, + ecard_def_fiq_enable, + ecard_def_fiq_disable, + ecard_def_fiq_pending +}; + +/* + * Enable and disable interrupts from expansion cards. + * (interrupts are disabled for these functions). + * + * They are not meant to be called directly, but via enable/disable_irq. + */ +static void ecard_irq_unmask(unsigned int irqnr) +{ + ecard_t *ec = slot_to_ecard(irqnr - 32); + + if (ec) { + if (!ec->ops) + ec->ops = &ecard_default_ops; + + if (ec->claimed && ec->ops->irqenable) + ec->ops->irqenable(ec, irqnr); + else + printk(KERN_ERR "ecard: rejecting request to " + "enable IRQs for %d\n", irqnr); + } +} + +static void ecard_irq_mask(unsigned int irqnr) +{ + ecard_t *ec = slot_to_ecard(irqnr - 32); + + if (ec) { + if (!ec->ops) + ec->ops = &ecard_default_ops; + + if (ec->ops && ec->ops->irqdisable) + ec->ops->irqdisable(ec, irqnr); + } +} + +static struct irqchip ecard_chip = { + .ack = ecard_irq_mask, + .mask = ecard_irq_mask, + .unmask = ecard_irq_unmask, +}; + +void ecard_enablefiq(unsigned int fiqnr) +{ + ecard_t *ec = slot_to_ecard(fiqnr); + + if (ec) { + if (!ec->ops) + ec->ops = &ecard_default_ops; + + if (ec->claimed && ec->ops->fiqenable) + ec->ops->fiqenable(ec, fiqnr); + else + printk(KERN_ERR "ecard: rejecting request to " + "enable FIQs for %d\n", fiqnr); + } +} + +void ecard_disablefiq(unsigned int fiqnr) +{ + ecard_t *ec = slot_to_ecard(fiqnr); + + if (ec) { + if (!ec->ops) + ec->ops = &ecard_default_ops; + + if (ec->ops->fiqdisable) + ec->ops->fiqdisable(ec, fiqnr); + } +} + +static void +ecard_dump_irq_state(ecard_t *ec) +{ + printk(" %d: %sclaimed, ", + ec->slot_no, + ec->claimed ? "" : "not "); + + if (ec->ops && ec->ops->irqpending && + ec->ops != &ecard_default_ops) + printk("irq %spending\n", + ec->ops->irqpending(ec) ? "" : "not "); + else + printk("irqaddr %p, mask = %02X, status = %02X\n", + ec->irqaddr, ec->irqmask, *ec->irqaddr); +} + +static void ecard_check_lockup(struct irqdesc *desc) +{ + static int last, lockup; + ecard_t *ec; + + /* + * If the timer interrupt has not run since the last million + * unrecognised expansion card interrupts, then there is + * something seriously wrong. Disable the expansion card + * interrupts so at least we can continue. + * + * Maybe we ought to start a timer to re-enable them some time + * later? + */ + if (last == jiffies) { + lockup += 1; + if (lockup > 1000000) { + printk(KERN_ERR "\nInterrupt lockup detected - " + "disabling all expansion card interrupts\n"); + + desc->chip->mask(IRQ_EXPANSIONCARD); + + printk("Expansion card IRQ state:\n"); + + for (ec = cards; ec; ec = ec->next) + ecard_dump_irq_state(ec); + } + } else + lockup = 0; + + /* + * If we did not recognise the source of this interrupt, + * warn the user, but don't flood the user with these messages. + */ + if (!last || time_after(jiffies, (unsigned long)(last + 5*HZ))) { + last = jiffies; + printk(KERN_WARNING "Unrecognised interrupt from backplane\n"); + } +} + +static void +ecard_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +{ + ecard_t *ec; + int called = 0; + + desc->chip->mask(irq); + for (ec = cards; ec; ec = ec->next) { + int pending; + + if (!ec->claimed || ec->irq == NO_IRQ) + continue; + + if (ec->ops && ec->ops->irqpending) + pending = ec->ops->irqpending(ec); + else + pending = ecard_default_ops.irqpending(ec); + + if (pending) { + struct irqdesc *d = irq_desc + ec->irq; + d->handle(ec->irq, d, regs); + called ++; + } + } + desc->chip->unmask(irq); + + if (called == 0) + ecard_check_lockup(desc); +} + +#define ecard_irqexp_handler NULL +#define ecard_probeirqhw() (0) + +unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) +{ + unsigned long address = 0; + int slot = ec->slot_no; + + ectcr &= ~(1 << slot); + + switch (type) { + case ECARD_MEMC: + address = IO_EC_MEMC_BASE + (slot << 12); + break; + + case ECARD_IOC: + address = IO_EC_IOC_BASE + (slot << 12) + (speed << 17); + break; + + default: + break; + } + + return address; +} + +static int ecard_prints(char *buffer, ecard_t *ec) +{ + char *start = buffer; + + buffer += sprintf(buffer, " %d: ", ec->slot_no); + + if (ec->cid.id == 0) { + struct in_chunk_dir incd; + + buffer += sprintf(buffer, "[%04X:%04X] ", + ec->cid.manufacturer, ec->cid.product); + + if (!ec->card_desc && ec->cid.cd && + ecard_readchunk(&incd, ec, 0xf5, 0)) { + ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL); + + if (ec->card_desc) + strcpy((char *)ec->card_desc, incd.d.string); + } + + buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); + } else + buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id); + + return buffer - start; +} + +static int get_ecard_dev_info(char *buf, char **start, off_t pos, int count) +{ + ecard_t *ec = cards; + off_t at = 0; + int len, cnt; + + cnt = 0; + while (ec && count > cnt) { + len = ecard_prints(buf, ec); + at += len; + if (at >= pos) { + if (!*start) { + *start = buf + (pos - (at - len)); + cnt = at - pos; + } else + cnt += len; + buf += len; + } + ec = ec->next; + } + return (count > cnt) ? cnt : count; +} + +static struct proc_dir_entry *proc_bus_ecard_dir = NULL; + +static void ecard_proc_init(void) +{ + proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus); + create_proc_info_entry("devices", 0, proc_bus_ecard_dir, + get_ecard_dev_info); +} + +#define ec_set_resource(ec,nr,st,sz,flg) \ + do { \ + (ec)->resource[nr].name = ec->dev.bus_id; \ + (ec)->resource[nr].start = st; \ + (ec)->resource[nr].end = (st) + (sz) - 1; \ + (ec)->resource[nr].flags = flg; \ + } while (0) + +static void __init ecard_init_resources(struct expansion_card *ec) +{ + unsigned long base = PODSLOT_IOC0_BASE; + unsigned int slot = ec->slot_no; + int i; + + ec_set_resource(ec, ECARD_RES_MEMC, + PODSLOT_MEMC_BASE + (slot << 14), + PODSLOT_MEMC_SIZE, IORESOURCE_MEM); + + for (i = 0; i < ECARD_RES_IOCSYNC - ECARD_RES_IOCSLOW; i++) { + ec_set_resource(ec, i + ECARD_RES_IOCSLOW, + base + (slot << 14) + (i << 19), + PODSLOT_IOC_SIZE, IORESOURCE_MEM); + } + + for (i = 0; i < ECARD_NUM_RESOURCES; i++) { + if (ec->resource[i].start && + request_resource(&iomem_resource, &ec->resource[i])) { + printk(KERN_ERR "%s: resource(s) not available\n", + ec->dev.bus_id); + ec->resource[i].end -= ec->resource[i].start; + ec->resource[i].start = 0; + } + } +} + +static ssize_t ecard_show_irq(struct device *dev, char *buf) +{ + struct expansion_card *ec = ECARD_DEV(dev); + return sprintf(buf, "%u\n", ec->irq); +} + +static ssize_t ecard_show_vendor(struct device *dev, char *buf) +{ + struct expansion_card *ec = ECARD_DEV(dev); + return sprintf(buf, "%u\n", ec->cid.manufacturer); +} + +static ssize_t ecard_show_device(struct device *dev, char *buf) +{ + struct expansion_card *ec = ECARD_DEV(dev); + return sprintf(buf, "%u\n", ec->cid.product); +} + +static ssize_t ecard_show_dma(struct device *dev, char *buf) +{ + struct expansion_card *ec = ECARD_DEV(dev); + return sprintf(buf, "%u\n", ec->dma); +} + +static ssize_t ecard_show_resources(struct device *dev, char *buf) +{ + struct expansion_card *ec = ECARD_DEV(dev); + char *str = buf; + int i; + + for (i = 0; i < ECARD_NUM_RESOURCES; i++) + str += sprintf(str, "%08lx %08lx %08lx\n", + ec->resource[i].start, + ec->resource[i].end, + ec->resource[i].flags); + + return str - buf; +} + +static DEVICE_ATTR(irq, S_IRUGO, ecard_show_irq, NULL); +static DEVICE_ATTR(vendor, S_IRUGO, ecard_show_vendor, NULL); +static DEVICE_ATTR(device, S_IRUGO, ecard_show_device, NULL); +static DEVICE_ATTR(dma, S_IRUGO, ecard_show_dma, NULL); +static DEVICE_ATTR(resource, S_IRUGO, ecard_show_resources, NULL); + +/* + * Probe for an expansion card. + * + * If bit 1 of the first byte of the card is set, then the + * card does not exist. + */ +static int __init +ecard_probe(int slot, card_type_t type) +{ + ecard_t **ecp; + ecard_t *ec; + struct ex_ecid cid; + int i, rc = -ENOMEM; + + ec = kmalloc(sizeof(ecard_t), GFP_KERNEL); + if (!ec) + goto nomem; + + memset(ec, 0, sizeof(ecard_t)); + + ec->slot_no = slot; + ec->type = type; + ec->irq = NO_IRQ; + ec->fiq = NO_IRQ; + ec->dma = NO_DMA; + ec->card_desc = NULL; + ec->ops = &ecard_default_ops; + + rc = -ENODEV; + if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0) + goto nodev; + + cid.r_zero = 1; + ecard_readbytes(&cid, ec, 0, 16, 0); + if (cid.r_zero) + goto nodev; + + ec->cid.id = cid.r_id; + ec->cid.cd = cid.r_cd; + ec->cid.is = cid.r_is; + ec->cid.w = cid.r_w; + ec->cid.manufacturer = ecard_getu16(cid.r_manu); + ec->cid.product = ecard_getu16(cid.r_prod); + ec->cid.country = cid.r_country; + ec->cid.irqmask = cid.r_irqmask; + ec->cid.irqoff = ecard_gets24(cid.r_irqoff); + ec->cid.fiqmask = cid.r_fiqmask; + ec->cid.fiqoff = ecard_gets24(cid.r_fiqoff); + ec->fiqaddr = + ec->irqaddr = (unsigned char *)ioaddr(ec->podaddr); + + if (ec->cid.is) { + ec->irqmask = ec->cid.irqmask; + ec->irqaddr += ec->cid.irqoff; + ec->fiqmask = ec->cid.fiqmask; + ec->fiqaddr += ec->cid.fiqoff; + } else { + ec->irqmask = 1; + ec->fiqmask = 4; + } + + for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++) + if (blacklist[i].manufacturer == ec->cid.manufacturer && + blacklist[i].product == ec->cid.product) { + ec->card_desc = blacklist[i].type; + break; + } + + snprintf(ec->dev.bus_id, sizeof(ec->dev.bus_id), "ecard%d", slot); + ec->dev.parent = NULL; + ec->dev.bus = &ecard_bus_type; + ec->dev.dma_mask = &ec->dma_mask; + ec->dma_mask = (u64)0xffffffff; + + ecard_init_resources(ec); + + /* + * hook the interrupt handlers + */ + ec->irq = 32 + slot; + set_irq_chip(ec->irq, &ecard_chip); + set_irq_handler(ec->irq, do_level_IRQ); + set_irq_flags(ec->irq, IRQF_VALID); + + for (ecp = &cards; *ecp; ecp = &(*ecp)->next); + + *ecp = ec; + slot_to_expcard[slot] = ec; + + device_register(&ec->dev); + device_create_file(&ec->dev, &dev_attr_dma); + device_create_file(&ec->dev, &dev_attr_irq); + device_create_file(&ec->dev, &dev_attr_resource); + device_create_file(&ec->dev, &dev_attr_vendor); + device_create_file(&ec->dev, &dev_attr_device); + + return 0; + +nodev: + kfree(ec); +nomem: + return rc; +} + +/* + * Initialise the expansion card system. + * Locate all hardware - interrupt management and + * actual cards. + */ +static int __init ecard_init(void) +{ + int slot, irqhw; + + printk("Probing expansion cards\n"); + + for (slot = 0; slot < MAX_ECARDS; slot ++) { + ecard_probe(slot, ECARD_IOC); + } + + irqhw = ecard_probeirqhw(); + + set_irq_chained_handler(IRQ_EXPANSIONCARD, + irqhw ? ecard_irqexp_handler : ecard_irq_handler); + + ecard_proc_init(); + + return 0; +} + +subsys_initcall(ecard_init); + +/* + * ECARD "bus" + */ +static const struct ecard_id * +ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec) +{ + int i; + + for (i = 0; ids[i].manufacturer != 65535; i++) + if (ec->cid.manufacturer == ids[i].manufacturer && + ec->cid.product == ids[i].product) + return ids + i; + + return NULL; +} + +static int ecard_drv_probe(struct device *dev) +{ + struct expansion_card *ec = ECARD_DEV(dev); + struct ecard_driver *drv = ECARD_DRV(dev->driver); + const struct ecard_id *id; + int ret; + + id = ecard_match_device(drv->id_table, ec); + + ecard_claim(ec); + ret = drv->probe(ec, id); + if (ret) + ecard_release(ec); + return ret; +} + +static int ecard_drv_remove(struct device *dev) +{ + struct expansion_card *ec = ECARD_DEV(dev); + struct ecard_driver *drv = ECARD_DRV(dev->driver); + + drv->remove(ec); + ecard_release(ec); + + return 0; +} + +/* + * Before rebooting, we must make sure that the expansion card is in a + * sensible state, so it can be re-detected. This means that the first + * page of the ROM must be visible. We call the expansion cards reset + * handler, if any. + */ +static void ecard_drv_shutdown(struct device *dev) +{ + struct expansion_card *ec = ECARD_DEV(dev); + struct ecard_driver *drv = ECARD_DRV(dev->driver); + struct ecard_request req; + + if (drv->shutdown) + drv->shutdown(ec); + ecard_release(ec); + req.req = req_reset; + req.ec = ec; + ecard_call(&req); +} + +int ecard_register_driver(struct ecard_driver *drv) +{ + drv->drv.bus = &ecard_bus_type; + drv->drv.probe = ecard_drv_probe; + drv->drv.remove = ecard_drv_remove; + drv->drv.shutdown = ecard_drv_shutdown; + + return driver_register(&drv->drv); +} + +void ecard_remove_driver(struct ecard_driver *drv) +{ + driver_unregister(&drv->drv); +} + +static int ecard_match(struct device *_dev, struct device_driver *_drv) +{ + struct expansion_card *ec = ECARD_DEV(_dev); + struct ecard_driver *drv = ECARD_DRV(_drv); + int ret; + + if (drv->id_table) { + ret = ecard_match_device(drv->id_table, ec) != NULL; + } else { + ret = ec->cid.id == drv->id; + } + + return ret; +} + +struct bus_type ecard_bus_type = { + .name = "ecard", + .match = ecard_match, +}; + +static int ecard_bus_init(void) +{ + return bus_register(&ecard_bus_type); +} + +postcore_initcall(ecard_bus_init); + +EXPORT_SYMBOL(ecard_readchunk); +EXPORT_SYMBOL(ecard_address); +EXPORT_SYMBOL(ecard_register_driver); +EXPORT_SYMBOL(ecard_remove_driver); +EXPORT_SYMBOL(ecard_bus_type); diff --git a/arch/arm26/kernel/entry.S b/arch/arm26/kernel/entry.S new file mode 100644 index 000000000000..a231dd88d0e1 --- /dev/null +++ b/arch/arm26/kernel/entry.S @@ -0,0 +1,961 @@ +/* arch/arm26/kernel/entry.S + * + * Assembled from chunks of code in arch/arm + * + * Copyright (C) 2003 Ian Molton + * Based on the work of RMK. + * + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm_offsets.h> +#include <asm/errno.h> +#include <asm/hardware.h> +#include <asm/sysirq.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/ptrace.h> + + .macro zero_fp +#ifndef CONFIG_NO_FRAME_POINTER + mov fp, #0 +#endif + .endm + + .text + +@ Bad Abort numbers +@ ----------------- +@ +#define BAD_PREFETCH 0 +#define BAD_DATA 1 +#define BAD_ADDREXCPTN 2 +#define BAD_IRQ 3 +#define BAD_UNDEFINSTR 4 + +@ OS version number used in SWIs +@ RISC OS is 0 +@ RISC iX is 8 +@ +#define OS_NUMBER 9 +#define ARMSWI_OFFSET 0x000f0000 + +@ +@ Stack format (ensured by USER_* and SVC_*) +@ PSR and PC are comined on arm26 +@ + +#define S_OFF 8 + +#define S_OLD_R0 64 +#define S_PC 60 +#define S_LR 56 +#define S_SP 52 +#define S_IP 48 +#define S_FP 44 +#define S_R10 40 +#define S_R9 36 +#define S_R8 32 +#define S_R7 28 +#define S_R6 24 +#define S_R5 20 +#define S_R4 16 +#define S_R3 12 +#define S_R2 8 +#define S_R1 4 +#define S_R0 0 + + .macro save_user_regs + str r0, [sp, #-4]! @ Store SVC r0 + str lr, [sp, #-4]! @ Store user mode PC + sub sp, sp, #15*4 + stmia sp, {r0 - lr}^ @ Store the other user-mode regs + mov r0, r0 + .endm + + .macro slow_restore_user_regs + ldmia sp, {r0 - lr}^ @ restore the user regs not including PC + mov r0, r0 + ldr lr, [sp, #15*4] @ get user PC + add sp, sp, #15*4+8 @ free stack + movs pc, lr @ return + .endm + + .macro fast_restore_user_regs + add sp, sp, #S_OFF + ldmib sp, {r1 - lr}^ + mov r0, r0 + ldr lr, [sp, #15*4] + add sp, sp, #15*4+8 + movs pc, lr + .endm + + .macro save_svc_regs + str sp, [sp, #-16]! + str lr, [sp, #8] + str lr, [sp, #4] + stmfd sp!, {r0 - r12} + mov r0, #-1 + str r0, [sp, #S_OLD_R0] + zero_fp + .endm + + .macro save_svc_regs_irq + str sp, [sp, #-16]! + str lr, [sp, #4] + ldr lr, .LCirq + ldr lr, [lr] + str lr, [sp, #8] + stmfd sp!, {r0 - r12} + mov r0, #-1 + str r0, [sp, #S_OLD_R0] + zero_fp + .endm + + .macro restore_svc_regs + ldmfd sp, {r0 - pc}^ + .endm + + .macro mask_pc, rd, rm + bic \rd, \rm, #PCMASK + .endm + + .macro disable_irqs, temp + mov \temp, pc + orr \temp, \temp, #PSR_I_BIT + teqp \temp, #0 + .endm + + .macro enable_irqs, temp + mov \temp, pc + and \temp, \temp, #~PSR_I_BIT + teqp \temp, #0 + .endm + + .macro initialise_traps_extra + .endm + + .macro get_thread_info, rd + mov \rd, sp, lsr #13 + mov \rd, \rd, lsl #13 + .endm + +/* + * These are the registers used in the syscall handler, and allow us to + * have in theory up to 7 arguments to a function - r0 to r6. + * + * Note that tbl == why is intentional. + * + * We must set at least "tsk" and "why" when calling ret_with_reschedule. + */ +scno .req r7 @ syscall number +tbl .req r8 @ syscall table pointer +why .req r8 @ Linux syscall (!= 0) +tsk .req r9 @ current thread_info + +/* + * Get the system call number. + */ + .macro get_scno + mask_pc lr, lr + ldr scno, [lr, #-4] @ get SWI instruction + .endm +/* + * ----------------------------------------------------------------------- + */ + +/* + * We rely on the fact that R0 is at the bottom of the stack (due to + * slow/fast restore user regs). + */ +#if S_R0 != 0 +#error "Please fix" +#endif + +/* + * This is the fast syscall return path. We do as little as + * possible here, and this includes saving r0 back into the SVC + * stack. + */ +ret_fast_syscall: + disable_irqs r1 @ disable interrupts + ldr r1, [tsk, #TI_FLAGS] + tst r1, #_TIF_WORK_MASK + bne fast_work_pending + fast_restore_user_regs + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +fast_work_pending: + str r0, [sp, #S_R0+S_OFF]! @ returned r0 +work_pending: + tst r1, #_TIF_NEED_RESCHED + bne work_resched + tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING + beq no_work_pending + mov r0, sp @ 'regs' + mov r2, why @ 'syscall' + bl do_notify_resume + disable_irqs r1 @ disable interrupts + b no_work_pending + +work_resched: + bl schedule +/* + * "slow" syscall return path. "why" tells us if this was a real syscall. + */ +ENTRY(ret_to_user) +ret_slow_syscall: + disable_irqs r1 @ disable interrupts + ldr r1, [tsk, #TI_FLAGS] + tst r1, #_TIF_WORK_MASK + bne work_pending +no_work_pending: + slow_restore_user_regs + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) + bl schedule_tail + get_thread_info tsk + ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing + mov why, #1 + tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? + beq ret_slow_syscall + mov r1, sp + mov r0, #1 @ trace exit [IP = 1] + bl syscall_trace + b ret_slow_syscall + +// FIXME - is this strictly necessary? +#include "calls.S" + +/*============================================================================= + * SWI handler + *----------------------------------------------------------------------------- + */ + + .align 5 +ENTRY(vector_swi) + save_user_regs + zero_fp + get_scno + +#ifdef CONFIG_ALIGNMENT_TRAP + ldr ip, __cr_alignment + ldr ip, [ip] + mcr p15, 0, ip, c1, c0 @ update control register +#endif + enable_irqs ip + + str r4, [sp, #-S_OFF]! @ push fifth arg + + get_thread_info tsk + ldr ip, [tsk, #TI_FLAGS] @ check for syscall tracing + bic scno, scno, #0xff000000 @ mask off SWI op-code + eor scno, scno, #OS_NUMBER << 20 @ check OS number + adr tbl, sys_call_table @ load syscall table pointer + tst ip, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? + bne __sys_trace + + adral lr, ret_fast_syscall @ set return address + orral lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return + cmp scno, #NR_syscalls @ check upper syscall limit + ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine + + add r1, sp, #S_OFF +2: mov why, #0 @ no longer a real syscall + cmp scno, #ARMSWI_OFFSET + eor r0, scno, #OS_NUMBER << 20 @ put OS number back + bcs arm_syscall + b sys_ni_syscall @ not private func + + /* + * This is the really slow path. We're going to be doing + * context switches, and waiting for our parent to respond. + */ +__sys_trace: + add r1, sp, #S_OFF + mov r0, #0 @ trace entry [IP = 0] + bl syscall_trace + + adral lr, __sys_trace_return @ set return address + orral lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return + add r1, sp, #S_R0 + S_OFF @ pointer to regs + cmp scno, #NR_syscalls @ check upper syscall limit + ldmccia r1, {r0 - r3} @ have to reload r0 - r3 + ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine + b 2b + +__sys_trace_return: + str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 + mov r1, sp + mov r0, #1 @ trace exit [IP = 1] + bl syscall_trace + b ret_slow_syscall + + .align 5 +#ifdef CONFIG_ALIGNMENT_TRAP + .type __cr_alignment, #object +__cr_alignment: + .word cr_alignment +#endif + + .type sys_call_table, #object +ENTRY(sys_call_table) +#include "calls.S" + +/*============================================================================ + * Special system call wrappers + */ +@ r0 = syscall number +@ r5 = syscall table + .type sys_syscall, #function +sys_syscall: + eor scno, r0, #OS_NUMBER << 20 + cmp scno, #NR_syscalls @ check range + stmleia sp, {r5, r6} @ shuffle args + movle r0, r1 + movle r1, r2 + movle r2, r3 + movle r3, r4 + ldrle pc, [tbl, scno, lsl #2] + b sys_ni_syscall + +sys_fork_wrapper: + add r0, sp, #S_OFF + b sys_fork + +sys_vfork_wrapper: + add r0, sp, #S_OFF + b sys_vfork + +sys_execve_wrapper: + add r3, sp, #S_OFF + b sys_execve + +sys_clone_wapper: + add r2, sp, #S_OFF + b sys_clone + +sys_sigsuspend_wrapper: + add r3, sp, #S_OFF + b sys_sigsuspend + +sys_rt_sigsuspend_wrapper: + add r2, sp, #S_OFF + b sys_rt_sigsuspend + +sys_sigreturn_wrapper: + add r0, sp, #S_OFF + b sys_sigreturn + +sys_rt_sigreturn_wrapper: + add r0, sp, #S_OFF + b sys_rt_sigreturn + +sys_sigaltstack_wrapper: + ldr r2, [sp, #S_OFF + S_SP] + b do_sigaltstack + +/* + * Note: off_4k (r5) is always units of 4K. If we can't do the requested + * offset, we return EINVAL. FIXME - this lost some stuff from arm32 to + * ifdefs. check it out. + */ +sys_mmap2: + tst r5, #((1 << (PAGE_SHIFT - 12)) - 1) + moveq r5, r5, lsr #PAGE_SHIFT - 12 + streq r5, [sp, #4] + beq do_mmap2 + mov r0, #-EINVAL + RETINSTR(mov,pc, lr) + +/* + * Design issues: + * - We have several modes that each vector can be called from, + * each with its own set of registers. On entry to any vector, + * we *must* save the registers used in *that* mode. + * + * - This code must be as fast as possible. + * + * There are a few restrictions on the vectors: + * - the SWI vector cannot be called from *any* non-user mode + * + * - the FP emulator is *never* called from *any* non-user mode undefined + * instruction. + * + */ + + .text + + .macro handle_irq +1: mov r4, #IOC_BASE + ldrb r6, [r4, #0x24] @ get high priority first + adr r5, irq_prio_h + teq r6, #0 + ldreqb r6, [r4, #0x14] @ get low priority + adreq r5, irq_prio_l + + teq r6, #0 @ If an IRQ happened... + ldrneb r0, [r5, r6] @ get IRQ number + movne r1, sp @ get struct pt_regs + adrne lr, 1b @ Set return address to 1b + orrne lr, lr, #PSR_I_BIT | MODE_SVC26 @ (and force SVC mode) + bne asm_do_IRQ @ process IRQ (if asserted) + .endm + + +/* + * Interrupt table (incorporates priority) + */ + .macro irq_prio_table +irq_prio_l: .byte 0, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 4, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + .byte 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 12, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 + .endm + +#if 1 +/* + * Uncomment these if you wish to get more debugging into about data aborts. + * FIXME - I bet we can find a way to encode these and keep performance. + */ +#define FAULT_CODE_LDRSTRPOST 0x80 +#define FAULT_CODE_LDRSTRPRE 0x40 +#define FAULT_CODE_LDRSTRREG 0x20 +#define FAULT_CODE_LDMSTM 0x10 +#define FAULT_CODE_LDCSTC 0x08 +#endif +#define FAULT_CODE_PREFETCH 0x04 +#define FAULT_CODE_WRITE 0x02 +#define FAULT_CODE_FORCECOW 0x01 + +/*============================================================================= + * Undefined FIQs + *----------------------------------------------------------------------------- + */ +_unexp_fiq: ldr sp, .LCfiq + mov r12, #IOC_BASE + strb r12, [r12, #0x38] @ Disable FIQ register + teqp pc, #PSR_I_BIT | PSR_F_BIT | MODE_SVC26 + mov r0, r0 + stmfd sp!, {r0 - r3, ip, lr} + adr r0, Lfiqmsg + bl printk + ldmfd sp!, {r0 - r3, ip, lr} + teqp pc, #PSR_I_BIT | PSR_F_BIT | MODE_FIQ26 + mov r0, r0 + movs pc, lr + +Lfiqmsg: .ascii "*** Unexpected FIQ\n\0" + .align + +.LCfiq: .word __temp_fiq +.LCirq: .word __temp_irq + +/*============================================================================= + * Undefined instruction handler + *----------------------------------------------------------------------------- + * Handles floating point instructions + */ +vector_undefinstr: + tst lr, #MODE_SVC26 @ did we come from a non-user mode? + bne __und_svc @ yes - deal with it. +/* Otherwise, fall through for the user-space (common) case. */ + save_user_regs + zero_fp @ zero frame pointer + teqp pc, #PSR_I_BIT | MODE_SVC26 @ disable IRQs +.Lbug_undef: + ldr r4, .LC2 + ldr pc, [r4] @ Call FP module entry point +/* FIXME - should we trap for a null pointer here? */ + +/* The SVC mode case */ +__und_svc: save_svc_regs @ Non-user mode + mask_pc r0, lr + and r2, lr, #3 + sub r0, r0, #4 + mov r1, sp + bl do_undefinstr + restore_svc_regs + +/* We get here if the FP emulator doesnt handle the undef instr. + * If the insn WAS handled, the emulator jumps to ret_from_exception by itself/ + */ + .globl fpundefinstr +fpundefinstr: + mov r0, lr + mov r1, sp + teqp pc, #MODE_SVC26 + bl do_undefinstr + b ret_from_exception @ Normal FP exit + +#if defined CONFIG_FPE_NWFPE || defined CONFIG_FPE_FASTFPE + /* The FPE is always present */ + .equ fpe_not_present, 0 +#else +/* We get here if an undefined instruction happens and the floating + * point emulator is not present. If the offending instruction was + * a WFS, we just perform a normal return as if we had emulated the + * operation. This is a hack to allow some basic userland binaries + * to run so that the emulator module proper can be loaded. --philb + * FIXME - probably a broken useless hack... + */ +fpe_not_present: + adr r10, wfs_mask_data + ldmia r10, {r4, r5, r6, r7, r8} + ldr r10, [sp, #S_PC] @ Load PC + sub r10, r10, #4 + mask_pc r10, r10 + ldrt r10, [r10] @ get instruction + and r5, r10, r5 + teq r5, r4 @ Is it WFS? + beq ret_from_exception + and r5, r10, r8 + teq r5, r6 @ Is it LDF/STF on sp or fp? + teqne r5, r7 + bne fpundefinstr + tst r10, #0x00200000 @ Does it have WB + beq ret_from_exception + and r4, r10, #255 @ get offset + and r6, r10, #0x000f0000 + tst r10, #0x00800000 @ +/- + ldr r5, [sp, r6, lsr #14] @ Load reg + rsbeq r4, r4, #0 + add r5, r5, r4, lsl #2 + str r5, [sp, r6, lsr #14] @ Save reg + b ret_from_exception + +wfs_mask_data: .word 0x0e200110 @ WFS/RFS + .word 0x0fef0fff + .word 0x0d0d0100 @ LDF [sp]/STF [sp] + .word 0x0d0b0100 @ LDF [fp]/STF [fp] + .word 0x0f0f0f00 +#endif + +.LC2: .word fp_enter + +/*============================================================================= + * Prefetch abort handler + *----------------------------------------------------------------------------- + */ +#define DEBUG_UNDEF +/* remember: lr = USR pc */ +vector_prefetch: + sub lr, lr, #4 + tst lr, #MODE_SVC26 + bne __pabt_invalid + save_user_regs + teqp pc, #MODE_SVC26 @ Enable IRQs... + mask_pc r0, lr @ Address of abort + mov r1, sp @ Tasks registers + bl do_PrefetchAbort + teq r0, #0 @ If non-zero, we believe this abort.. + bne ret_from_exception +#ifdef DEBUG_UNDEF + adr r0, t + bl printk +#endif + ldr lr, [sp,#S_PC] @ FIXME program to test this on. I think its + b .Lbug_undef @ broken at the moment though!) + +__pabt_invalid: save_svc_regs + mov r0, sp @ Prefetch aborts are definitely *not* + mov r1, #BAD_PREFETCH @ allowed in non-user modes. We cant + and r2, lr, #3 @ recover from this problem. + b bad_mode + +#ifdef DEBUG_UNDEF +t: .ascii "*** undef ***\r\n\0" + .align +#endif + +/*============================================================================= + * Address exception handler + *----------------------------------------------------------------------------- + * These aren't too critical. + * (they're not supposed to happen). + * In order to debug the reason for address exceptions in non-user modes, + * we have to obtain all the registers so that we can see what's going on. + */ + +vector_addrexcptn: + sub lr, lr, #8 + tst lr, #3 + bne Laddrexcptn_not_user + save_user_regs + teq pc, #MODE_SVC26 + mask_pc r0, lr @ Point to instruction + mov r1, sp @ Point to registers + mov r2, #0x400 + mov lr, pc + bl do_excpt + b ret_from_exception + +Laddrexcptn_not_user: + save_svc_regs + and r2, lr, #3 + teq r2, #3 + bne Laddrexcptn_illegal_mode + teqp pc, #MODE_SVC26 + mask_pc r0, lr + mov r1, sp + orr r2, r2, #0x400 + bl do_excpt + ldmia sp, {r0 - lr} @ I cant remember the reason I changed this... + add sp, sp, #15*4 + movs pc, lr + +Laddrexcptn_illegal_mode: + mov r0, sp + str lr, [sp, #-4]! + orr r1, r2, #PSR_I_BIT | PSR_F_BIT + teqp r1, #0 @ change into mode (wont be user mode) + mov r0, r0 + mov r1, r8 @ Any register from r8 - r14 can be banked + mov r2, r9 + mov r3, r10 + mov r4, r11 + mov r5, r12 + mov r6, r13 + mov r7, r14 + teqp pc, #PSR_F_BIT | MODE_SVC26 @ back to svc + mov r0, r0 + stmfd sp!, {r1-r7} + ldmia r0, {r0-r7} + stmfd sp!, {r0-r7} + mov r0, sp + mov r1, #BAD_ADDREXCPTN + b bad_mode + +/*============================================================================= + * Interrupt (IRQ) handler + *----------------------------------------------------------------------------- + * Note: if the IRQ was taken whilst in user mode, then *no* kernel routine + * is running, so do not have to save svc lr. + * + * Entered in IRQ mode. + */ + +vector_IRQ: ldr sp, .LCirq @ Setup some temporary stack + sub lr, lr, #4 + str lr, [sp] @ push return address + + tst lr, #3 + bne __irq_non_usr + +__irq_usr: teqp pc, #PSR_I_BIT | MODE_SVC26 @ Enter SVC mode + mov r0, r0 + + ldr lr, .LCirq + ldr lr, [lr] @ Restore lr for jump back to USR + + save_user_regs + + handle_irq + + mov why, #0 + get_thread_info tsk + b ret_to_user + +@ Place the IRQ priority table here so that the handle_irq macros above +@ and below here can access it. + + irq_prio_table + +__irq_non_usr: teqp pc, #PSR_I_BIT | MODE_SVC26 @ Enter SVC mode + mov r0, r0 + + save_svc_regs_irq + + and r2, lr, #3 + teq r2, #3 + bne __irq_invalid @ IRQ not from SVC mode + + handle_irq + + restore_svc_regs + +__irq_invalid: mov r0, sp + mov r1, #BAD_IRQ + b bad_mode + +/*============================================================================= + * Data abort handler code + *----------------------------------------------------------------------------- + * + * This handles both exceptions from user and SVC modes, computes the address + * range of the problem, and does any correction that is required. It then + * calls the kernel data abort routine. + * + * This is where I wish that the ARM would tell you which address aborted. + */ + +vector_data: sub lr, lr, #8 @ Correct lr + tst lr, #3 + bne Ldata_not_user + save_user_regs + teqp pc, #MODE_SVC26 + mask_pc r0, lr + bl Ldata_do + b ret_from_exception + +Ldata_not_user: + save_svc_regs + and r2, lr, #3 + teq r2, #3 + bne Ldata_illegal_mode + tst lr, #PSR_I_BIT + teqeqp pc, #MODE_SVC26 + mask_pc r0, lr + bl Ldata_do + restore_svc_regs + +Ldata_illegal_mode: + mov r0, sp + mov r1, #BAD_DATA + b bad_mode + +Ldata_do: mov r3, sp + ldr r4, [r0] @ Get instruction + mov r2, #0 + tst r4, #1 << 20 @ Check to see if it is a write instruction + orreq r2, r2, #FAULT_CODE_WRITE @ Indicate write instruction + mov r1, r4, lsr #22 @ Now branch to the relevent processing routine + and r1, r1, #15 << 2 + add pc, pc, r1 + movs pc, lr + b Ldata_unknown + b Ldata_unknown + b Ldata_unknown + b Ldata_unknown + b Ldata_ldrstr_post @ ldr rd, [rn], #m + b Ldata_ldrstr_numindex @ ldr rd, [rn, #m] @ RegVal + b Ldata_ldrstr_post @ ldr rd, [rn], rm + b Ldata_ldrstr_regindex @ ldr rd, [rn, rm] + b Ldata_ldmstm @ ldm*a rn, <rlist> + b Ldata_ldmstm @ ldm*b rn, <rlist> + b Ldata_unknown + b Ldata_unknown + b Ldata_ldrstr_post @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m + b Ldata_ldcstc_pre @ ldc rd, [rn, #m] + b Ldata_unknown +Ldata_unknown: @ Part of jumptable + mov r0, r1 + mov r1, r4 + mov r2, r3 + b baddataabort + +Ldata_ldrstr_post: + mov r0, r4, lsr #14 @ Get Rn + and r0, r0, #15 << 2 @ Mask out reg. + teq r0, #15 << 2 + ldr r0, [r3, r0] @ Get register + biceq r0, r0, #PCMASK + mov r1, r0 +#ifdef FAULT_CODE_LDRSTRPOST + orr r2, r2, #FAULT_CODE_LDRSTRPOST +#endif + b do_DataAbort + +Ldata_ldrstr_numindex: + mov r0, r4, lsr #14 @ Get Rn + and r0, r0, #15 << 2 @ Mask out reg. + teq r0, #15 << 2 + ldr r0, [r3, r0] @ Get register + mov r1, r4, lsl #20 + biceq r0, r0, #PCMASK + tst r4, #1 << 23 + addne r0, r0, r1, lsr #20 + subeq r0, r0, r1, lsr #20 + mov r1, r0 +#ifdef FAULT_CODE_LDRSTRPRE + orr r2, r2, #FAULT_CODE_LDRSTRPRE +#endif + b do_DataAbort + +Ldata_ldrstr_regindex: + mov r0, r4, lsr #14 @ Get Rn + and r0, r0, #15 << 2 @ Mask out reg. + teq r0, #15 << 2 + ldr r0, [r3, r0] @ Get register + and r7, r4, #15 + biceq r0, r0, #PCMASK + teq r7, #15 @ Check for PC + ldr r7, [r3, r7, lsl #2] @ Get Rm + and r8, r4, #0x60 @ Get shift types + biceq r7, r7, #PCMASK + mov r9, r4, lsr #7 @ Get shift amount + and r9, r9, #31 + teq r8, #0 + moveq r7, r7, lsl r9 + teq r8, #0x20 @ LSR shift + moveq r7, r7, lsr r9 + teq r8, #0x40 @ ASR shift + moveq r7, r7, asr r9 + teq r8, #0x60 @ ROR shift + moveq r7, r7, ror r9 + tst r4, #1 << 23 + addne r0, r0, r7 + subeq r0, r0, r7 @ Apply correction + mov r1, r0 +#ifdef FAULT_CODE_LDRSTRREG + orr r2, r2, #FAULT_CODE_LDRSTRREG +#endif + b do_DataAbort + +Ldata_ldmstm: + mov r7, #0x11 + orr r7, r7, r7, lsl #8 + and r0, r4, r7 + and r1, r4, r7, lsl #1 + add r0, r0, r1, lsr #1 + and r1, r4, r7, lsl #2 + add r0, r0, r1, lsr #2 + and r1, r4, r7, lsl #3 + add r0, r0, r1, lsr #3 + add r0, r0, r0, lsr #8 + add r0, r0, r0, lsr #4 + and r7, r0, #15 @ r7 = no. of registers to transfer. + mov r5, r4, lsr #14 @ Get Rn + and r5, r5, #15 << 2 + ldr r0, [r3, r5] @ Get reg + eor r6, r4, r4, lsl #2 + tst r6, #1 << 23 @ Check inc/dec ^ writeback + rsbeq r7, r7, #0 + add r7, r0, r7, lsl #2 @ Do correction (signed) + subne r1, r7, #1 + subeq r1, r0, #1 + moveq r0, r7 + tst r4, #1 << 21 @ Check writeback + strne r7, [r3, r5] + eor r6, r4, r4, lsl #1 + tst r6, #1 << 24 @ Check Pre/Post ^ inc/dec + addeq r0, r0, #4 + addeq r1, r1, #4 + teq r5, #15*4 @ CHECK FOR PC + biceq r1, r1, #PCMASK + biceq r0, r0, #PCMASK +#ifdef FAULT_CODE_LDMSTM + orr r2, r2, #FAULT_CODE_LDMSTM +#endif + b do_DataAbort + +Ldata_ldcstc_pre: + mov r0, r4, lsr #14 @ Get Rn + and r0, r0, #15 << 2 @ Mask out reg. + teq r0, #15 << 2 + ldr r0, [r3, r0] @ Get register + mov r1, r4, lsl #24 @ Get offset + biceq r0, r0, #PCMASK + tst r4, #1 << 23 + addne r0, r0, r1, lsr #24 + subeq r0, r0, r1, lsr #24 + mov r1, r0 +#ifdef FAULT_CODE_LDCSTC + orr r2, r2, #FAULT_CODE_LDCSTC +#endif + b do_DataAbort + + +/* + * This is the return code to user mode for abort handlers + */ +ENTRY(ret_from_exception) + get_thread_info tsk + mov why, #0 + b ret_to_user + + .data +ENTRY(fp_enter) + .word fpe_not_present + .text +/* + * Register switch for older 26-bit only ARMs + */ +ENTRY(__switch_to) + add r0, r0, #TI_CPU_SAVE + stmia r0, {r4 - sl, fp, sp, lr} + add r1, r1, #TI_CPU_SAVE + ldmia r1, {r4 - sl, fp, sp, pc}^ + +/* + *============================================================================= + * Low-level interface code + *----------------------------------------------------------------------------- + * Trap initialisation + *----------------------------------------------------------------------------- + * + * Note - FIQ code has changed. The default is a couple of words in 0x1c, 0x20 + * that call _unexp_fiq. Nowever, we now copy the FIQ routine to 0x1c (removes + * some excess cycles). + * + * What we need to put into 0-0x1c are branches to branch to the kernel. + */ + + .section ".init.text",#alloc,#execinstr + +.Ljump_addresses: + swi SYS_ERROR0 + .word vector_undefinstr - 12 + .word vector_swi - 16 + .word vector_prefetch - 20 + .word vector_data - 24 + .word vector_addrexcptn - 28 + .word vector_IRQ - 32 + .word _unexp_fiq - 36 + b . + 8 +/* + * initialise the trap system + */ +ENTRY(__trap_init) + stmfd sp!, {r4 - r7, lr} + adr r1, .Ljump_addresses + ldmia r1, {r1 - r7, ip, lr} + orr r2, lr, r2, lsr #2 + orr r3, lr, r3, lsr #2 + orr r4, lr, r4, lsr #2 + orr r5, lr, r5, lsr #2 + orr r6, lr, r6, lsr #2 + orr r7, lr, r7, lsr #2 + orr ip, lr, ip, lsr #2 + mov r0, #0 + stmia r0, {r1 - r7, ip} + ldmfd sp!, {r4 - r7, pc}^ + + .bss +__temp_irq: .space 4 @ saved lr_irq +__temp_fiq: .space 128 diff --git a/arch/arm26/kernel/fiq.c b/arch/arm26/kernel/fiq.c new file mode 100644 index 000000000000..08a97c9498ff --- /dev/null +++ b/arch/arm26/kernel/fiq.c @@ -0,0 +1,202 @@ +/* + * linux/arch/arm26/kernel/fiq.c + * + * Copyright (C) 1998 Russell King + * Copyright (C) 1998, 1999 Phil Blundell + * Copyright (C) 2003 Ian Molton + * + * FIQ support written by Philip Blundell <philb@gnu.org>, 1998. + * + * FIQ support re-written by Russell King to be more generic + * + * We now properly support a method by which the FIQ handlers can + * be stacked onto the vector. We still do not support sharing + * the FIQ vector itself. + * + * Operation is as follows: + * 1. Owner A claims FIQ: + * - default_fiq relinquishes control. + * 2. Owner A: + * - inserts code. + * - sets any registers, + * - enables FIQ. + * 3. Owner B claims FIQ: + * - if owner A has a relinquish function. + * - disable FIQs. + * - saves any registers. + * - returns zero. + * 4. Owner B: + * - inserts code. + * - sets any registers, + * - enables FIQ. + * 5. Owner B releases FIQ: + * - Owner A is asked to reacquire FIQ: + * - inserts code. + * - restores saved registers. + * - enables FIQ. + * 6. Goto 3 + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/init.h> +#include <linux/seq_file.h> + +#include <asm/fiq.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/pgalloc.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#define FIQ_VECTOR (vectors_base() + 0x1c) + +static unsigned long no_fiq_insn; + +#define unprotect_page_0() +#define protect_page_0() + +/* Default reacquire function + * - we always relinquish FIQ control + * - we always reacquire FIQ control + */ +static int fiq_def_op(void *ref, int relinquish) +{ + if (!relinquish) { + unprotect_page_0(); + *(unsigned long *)FIQ_VECTOR = no_fiq_insn; + protect_page_0(); + } + + return 0; +} + +static struct fiq_handler default_owner = { + .name = "default", + .fiq_op = fiq_def_op, +}; + +static struct fiq_handler *current_fiq = &default_owner; + +int show_fiq_list(struct seq_file *p, void *v) +{ + if (current_fiq != &default_owner) + seq_printf(p, "FIQ: %s\n", current_fiq->name); + + return 0; +} + +void set_fiq_handler(void *start, unsigned int length) +{ + unprotect_page_0(); + + memcpy((void *)FIQ_VECTOR, start, length); + + protect_page_0(); +} + +/* + * Taking an interrupt in FIQ mode is death, so both these functions + * disable irqs for the duration. + */ +void set_fiq_regs(struct pt_regs *regs) +{ + register unsigned long tmp, tmp2; + __asm__ volatile ( + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, %3 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + ldmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" + : "=&r" (tmp), "=&r" (tmp2) + : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); +} + +void get_fiq_regs(struct pt_regs *regs) +{ + register unsigned long tmp, tmp2; + __asm__ volatile ( + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, %3 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + stmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" + : "=&r" (tmp), "=&r" (tmp2) + : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); +} + +int claim_fiq(struct fiq_handler *f) +{ + int ret = 0; + + if (current_fiq) { + ret = -EBUSY; + + if (current_fiq->fiq_op != NULL) + ret = current_fiq->fiq_op(current_fiq->dev_id, 1); + } + + if (!ret) { + f->next = current_fiq; + current_fiq = f; + } + + return ret; +} + +void release_fiq(struct fiq_handler *f) +{ + if (current_fiq != f) { + printk(KERN_ERR "%s FIQ trying to release %s FIQ\n", + f->name, current_fiq->name); +#ifdef CONFIG_DEBUG_ERRORS + __backtrace(); +#endif + return; + } + + do + current_fiq = current_fiq->next; + while (current_fiq->fiq_op(current_fiq->dev_id, 0)); +} + +void enable_fiq(int fiq) +{ + enable_irq(fiq + FIQ_START); +} + +void disable_fiq(int fiq) +{ + disable_irq(fiq + FIQ_START); +} + +EXPORT_SYMBOL(set_fiq_handler); +EXPORT_SYMBOL(set_fiq_regs); +EXPORT_SYMBOL(get_fiq_regs); +EXPORT_SYMBOL(claim_fiq); +EXPORT_SYMBOL(release_fiq); +EXPORT_SYMBOL(enable_fiq); +EXPORT_SYMBOL(disable_fiq); + +void __init init_FIQ(void) +{ + no_fiq_insn = *(unsigned long *)FIQ_VECTOR; + set_fs(get_fs()); +} diff --git a/arch/arm26/kernel/head.S b/arch/arm26/kernel/head.S new file mode 100644 index 000000000000..8bfc62539ba6 --- /dev/null +++ b/arch/arm26/kernel/head.S @@ -0,0 +1,113 @@ +/* + * linux/arch/arm26/kernel/head.S + * + * Copyright (C) 1994-2000 Russell King + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 26-bit kernel startup code + */ +#include <linux/config.h> +#include <linux/linkage.h> +#include <asm/mach-types.h> + + .globl swapper_pg_dir + .equ swapper_pg_dir, 0x0207d000 + +/* + * Entry point. + */ + .section ".init.text",#alloc,#execinstr +ENTRY(stext) + +__entry: + cmp pc, #0x02000000 + ldrlt pc, LC0 @ if 0x01800000, call at 0x02080000 + teq r0, #0 @ Check for old calling method + blne oldparams @ Move page if old + + adr r0, LC0 + ldmib r0, {r2-r5, sp} @ Setup stack (and fetch other values) + + mov r0, #0 @ Clear BSS +1: cmp r2, r3 + strcc r0, [r2], #4 + bcc 1b + + bl detect_proc_type + str r0, [r4] + bl detect_arch_type + str r0, [r5] + +#ifdef CONFIG_XIP_KERNEL + ldr r3, ETEXT @ data section copy + ldr r4, SDATA + ldr r5, EDATA +1: + ldr r6, [r3], #4 + str r6, [r4], #4 + cmp r4, r5 + blt 1b +#endif + mov fp, #0 + b start_kernel + +LC0: .word _stext + .word __bss_start @ r2 + .word _end @ r3 + .word processor_id @ r4 + .word __machine_arch_type @ r5 + .word init_thread_union+8192 @ sp +#ifdef CONFIG_XIP_KERNEL +ETEXT: .word _endtext +SDATA: .word _sdata +EDATA: .word __bss_start +#endif + +arm2_id: .long 0x41560200 @ ARM2 and 250 dont have a CPUID +arm250_id: .long 0x41560250 @ So we create some after probing for them + .align + +oldparams: mov r4, #0x02000000 + add r3, r4, #0x00080000 + add r4, r4, #0x0007c000 +1: ldmia r0!, {r5 - r12} + stmia r4!, {r5 - r12} + cmp r4, r3 + blt 1b + mov pc, lr + +/* + * We need some way to automatically detect the difference between + * these two machines. Unfortunately, it is not possible to detect + * the presence of the SuperIO chip, because that will hang the old + * Archimedes machines solid. + */ +/* DAG: Outdated, these have been combined !!!!!!! */ +detect_arch_type: +#if defined(CONFIG_ARCH_ARC) + mov r0, #MACH_TYPE_ARCHIMEDES +#elif defined(CONFIG_ARCH_A5K) + mov r0, #MACH_TYPE_A5K +#endif + mov pc, lr + +detect_proc_type: + mov ip, lr + mov r2, #0xea000000 @ Point undef instr to continuation + adr r0, continue - 12 + orr r0, r2, r0, lsr #2 + mov r1, #0 + str r0, [r1, #4] + ldr r0, arm2_id + swp r2, r2, [r1] @ check for swp (ARM2 cant) + ldr r0, arm250_id + mrc 15, 0, r3, c0, c0 @ check for CP#15 (ARM250 cant) + mov r0, r3 +continue: mov r2, #0xeb000000 @ Make undef vector loop + sub r2, r2, #2 + str r2, [r1, #4] + mov pc, ip diff --git a/arch/arm26/kernel/init_task.c b/arch/arm26/kernel/init_task.c new file mode 100644 index 000000000000..4191565b889b --- /dev/null +++ b/arch/arm26/kernel/init_task.c @@ -0,0 +1,49 @@ +/* + * linux/arch/arm26/kernel/init_task.c + * + * Copyright (C) 2003 Ian Molton + * + */ +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. + * + * We need to make sure that this is 8192-byte aligned due to the + * way process stacks are handled. This is done by making sure + * the linker maps this in the .text segment right after head.S, + * and making the linker scripts ensure the proper alignment. + * + * FIXME - should this be 32K alignment on arm26? + * + * The things we do for performance... + */ +union thread_union init_thread_union + __attribute__((__section__(".init.task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/arm26/kernel/irq.c b/arch/arm26/kernel/irq.c new file mode 100644 index 000000000000..f3cc1036e5bc --- /dev/null +++ b/arch/arm26/kernel/irq.c @@ -0,0 +1,716 @@ +/* + * linux/arch/arm/kernel/irq.c + * + * Copyright (C) 1992 Linus Torvalds + * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. + * 'Borrowed' for ARM26 and (C) 2003 Ian Molton. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * IRQ's are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/errno.h> + +#include <asm/irq.h> +#include <asm/system.h> +#include <asm/irqchip.h> + +//FIXME - this ought to be in a header IMO +void __init arc_init_irq(void); + +/* + * Maximum IRQ count. Currently, this is arbitary. However, it should + * not be set too low to prevent false triggering. Conversely, if it + * is set too high, then you could miss a stuck IRQ. + * + * FIXME Maybe we ought to set a timer and re-enable the IRQ at a later time? + */ +#define MAX_IRQ_CNT 100000 + +static volatile unsigned long irq_err_count; +static DEFINE_SPINLOCK(irq_controller_lock); + +struct irqdesc irq_desc[NR_IRQS]; + +/* + * Dummy mask/unmask handler + */ +void dummy_mask_unmask_irq(unsigned int irq) +{ +} + +void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +{ + irq_err_count += 1; + printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq); +} + +static struct irqchip bad_chip = { + .ack = dummy_mask_unmask_irq, + .mask = dummy_mask_unmask_irq, + .unmask = dummy_mask_unmask_irq, +}; + +static struct irqdesc bad_irq_desc = { + .chip = &bad_chip, + .handle = do_bad_IRQ, + .depth = 1, +}; + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. We do this lazily. + * + * This function may be called from IRQ context. + */ +void disable_irq(unsigned int irq) +{ + struct irqdesc *desc = irq_desc + irq; + unsigned long flags; + spin_lock_irqsave(&irq_controller_lock, flags); + if (!desc->depth++) + desc->enabled = 0; + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +/** + * enable_irq - enable interrupt handling on an irq + * @irq: Interrupt to enable + * + * Re-enables the processing of interrupts on this IRQ line. + * Note that this may call the interrupt handler, so you may + * get unexpected results if you hold IRQs disabled. + * + * This function may be called from IRQ context. + */ +void enable_irq(unsigned int irq) +{ + struct irqdesc *desc = irq_desc + irq; + unsigned long flags; + int pending = 0; + + spin_lock_irqsave(&irq_controller_lock, flags); + if (unlikely(!desc->depth)) { + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); //FIXME bum addresses reported - why? + } else if (!--desc->depth) { + desc->probing = 0; + desc->enabled = 1; + desc->chip->unmask(irq); + pending = desc->pending; + desc->pending = 0; + /* + * If the interrupt was waiting to be processed, + * retrigger it. + */ + if (pending) + desc->chip->rerun(irq); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v; + struct irqaction * action; + + if (i < NR_IRQS) { + action = irq_desc[i].action; + if (!action) + continue; + seq_printf(p, "%3d: %10u ", i, kstat_irqs(i)); + seq_printf(p, " %s", action->name); + for (action = action->next; action; action = action->next) { + seq_printf(p, ", %s", action->name); + } + seq_putc(p, '\n'); + } else if (i == NR_IRQS) { + show_fiq_list(p, v); + seq_printf(p, "Err: %10lu\n", irq_err_count); + } + return 0; +} + +/* + * IRQ lock detection. + * + * Hopefully, this should get us out of a few locked situations. + * However, it may take a while for this to happen, since we need + * a large number if IRQs to appear in the same jiffie with the + * same instruction pointer (or within 2 instructions). + */ +static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) +{ + unsigned long instr_ptr = instruction_pointer(regs); + + if (desc->lck_jif == jiffies && + desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { + desc->lck_cnt += 1; + + if (desc->lck_cnt > MAX_IRQ_CNT) { + printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); + return 1; + } + } else { + desc->lck_cnt = 0; + desc->lck_pc = instruction_pointer(regs); + desc->lck_jif = jiffies; + } + return 0; +} + +static void +__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) +{ + unsigned int status; + int ret; + + spin_unlock(&irq_controller_lock); + if (!(action->flags & SA_INTERRUPT)) + local_irq_enable(); + + status = 0; + do { + ret = action->handler(irq, action->dev_id, regs); + if (ret == IRQ_HANDLED) + status |= action->flags; + action = action->next; + } while (action); + + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + + spin_lock_irq(&irq_controller_lock); +} + +/* + * This is for software-decoded IRQs. The caller is expected to + * handle the ack, clear, mask and unmask issues. + */ +void +do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +{ + struct irqaction *action; + const int cpu = smp_processor_id(); + + desc->triggered = 1; + + kstat_cpu(cpu).irqs[irq]++; + + action = desc->action; + if (action) + __do_irq(irq, desc->action, regs); +} + +/* + * Most edge-triggered IRQ implementations seem to take a broken + * approach to this. Hence the complexity. + */ +void +do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +{ + const int cpu = smp_processor_id(); + + desc->triggered = 1; + + /* + * If we're currently running this IRQ, or its disabled, + * we shouldn't process the IRQ. Instead, turn on the + * hardware masks. + */ + if (unlikely(desc->running || !desc->enabled)) + goto running; + + /* + * Acknowledge and clear the IRQ, but don't mask it. + */ + desc->chip->ack(irq); + + /* + * Mark the IRQ currently in progress. + */ + desc->running = 1; + + kstat_cpu(cpu).irqs[irq]++; + + do { + struct irqaction *action; + + action = desc->action; + if (!action) + break; + + if (desc->pending && desc->enabled) { + desc->pending = 0; + desc->chip->unmask(irq); + } + + __do_irq(irq, action, regs); + } while (desc->pending); + + desc->running = 0; + + /* + * If we were disabled or freed, shut down the handler. + */ + if (likely(desc->action && !check_irq_lock(desc, irq, regs))) + return; + + running: + /* + * We got another IRQ while this one was masked or + * currently running. Delay it. + */ + desc->pending = 1; + desc->chip->mask(irq); + desc->chip->ack(irq); +} + +/* + * Level-based IRQ handler. Nice and simple. + */ +void +do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +{ + struct irqaction *action; + const int cpu = smp_processor_id(); + + desc->triggered = 1; + + /* + * Acknowledge, clear _AND_ disable the interrupt. + */ + desc->chip->ack(irq); + + if (likely(desc->enabled)) { + kstat_cpu(cpu).irqs[irq]++; + + /* + * Return with this interrupt masked if no action + */ + action = desc->action; + if (action) { + __do_irq(irq, desc->action, regs); + + if (likely(desc->enabled && + !check_irq_lock(desc, irq, regs))) + desc->chip->unmask(irq); + } + } +} + +/* + * do_IRQ handles all hardware IRQ's. Decoded IRQs should not + * come via this function. Instead, they should provide their + * own 'handler' + */ +asmlinkage void asm_do_IRQ(int irq, struct pt_regs *regs) +{ + struct irqdesc *desc = irq_desc + irq; + + /* + * Some hardware gives randomly wrong interrupts. Rather + * than crashing, do something sensible. + */ + if (irq >= NR_IRQS) + desc = &bad_irq_desc; + + irq_enter(); + spin_lock(&irq_controller_lock); + desc->handle(irq, desc, regs); + spin_unlock(&irq_controller_lock); + irq_exit(); +} + +void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained) +{ + struct irqdesc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq); + return; + } + + if (handle == NULL) + handle = do_bad_IRQ; + + desc = irq_desc + irq; + + if (is_chained && desc->chip == &bad_chip) + printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + if (handle == do_bad_IRQ) { + desc->chip->mask(irq); + desc->chip->ack(irq); + desc->depth = 1; + desc->enabled = 0; + } + desc->handle = handle; + if (handle != do_bad_IRQ && is_chained) { + desc->valid = 0; + desc->probe_ok = 0; + desc->depth = 0; + desc->chip->unmask(irq); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +void set_irq_chip(unsigned int irq, struct irqchip *chip) +{ + struct irqdesc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq); + return; + } + + if (chip == NULL) + chip = &bad_chip; + + desc = irq_desc + irq; + spin_lock_irqsave(&irq_controller_lock, flags); + desc->chip = chip; + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +int set_irq_type(unsigned int irq, unsigned int type) +{ + struct irqdesc *desc; + unsigned long flags; + int ret = -ENXIO; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); + return -ENODEV; + } + + desc = irq_desc + irq; + if (desc->chip->type) { + spin_lock_irqsave(&irq_controller_lock, flags); + ret = desc->chip->type(irq, type); + spin_unlock_irqrestore(&irq_controller_lock, flags); + } + + return ret; +} + +void set_irq_flags(unsigned int irq, unsigned int iflags) +{ + struct irqdesc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq); + return; + } + + desc = irq_desc + irq; + spin_lock_irqsave(&irq_controller_lock, flags); + desc->valid = (iflags & IRQF_VALID) != 0; + desc->probe_ok = (iflags & IRQF_PROBE) != 0; + desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0; + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +int setup_irq(unsigned int irq, struct irqaction *new) +{ + int shared = 0; + struct irqaction *old, **p; + unsigned long flags; + struct irqdesc *desc; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + desc = irq_desc + irq; + spin_lock_irqsave(&irq_controller_lock, flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&irq_controller_lock, flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->probing = 0; + desc->running = 0; + desc->pending = 0; + desc->depth = 1; + if (!desc->noautoenable) { + desc->depth = 0; + desc->enabled = 1; + desc->chip->unmask(irq); + } + } + + spin_unlock_irqrestore(&irq_controller_lock, flags); + return 0; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +//FIXME - handler used to return void - whats the significance of the change? +int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irq_flags, const char * devname, void *dev_id) +{ + unsigned long retval; + struct irqaction *action; + + if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler || + (irq_flags & SA_SHIRQ && !dev_id)) + return -EINVAL; + + action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irq_flags; + cpus_clear(action->mask); + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + + if (retval) + kfree(action); + return retval; +} + +EXPORT_SYMBOL(request_irq); + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. + * + * This function may be called from interrupt context. + */ +void free_irq(unsigned int irq, void *dev_id) +{ + struct irqaction * action, **p; + unsigned long flags; + + if (irq >= NR_IRQS || !irq_desc[irq].valid) { + printk(KERN_ERR "Trying to free IRQ%d\n",irq); +#ifdef CONFIG_DEBUG_ERRORS + __backtrace(); +#endif + return; + } + + spin_lock_irqsave(&irq_controller_lock, flags); + for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { + if (action->dev_id != dev_id) + continue; + + /* Found it - now free it */ + *p = action->next; + kfree(action); + goto out; + } + printk(KERN_ERR "Trying to free free IRQ%d\n",irq); +#ifdef CONFIG_DEBUG_ERRORS + __backtrace(); +#endif +out: + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +EXPORT_SYMBOL(free_irq); + +/* Start the interrupt probing. Unlike other architectures, + * we don't return a mask of interrupts from probe_irq_on, + * but return the number of interrupts enabled for the probe. + * The interrupts which have been enabled for probing is + * instead recorded in the irq_desc structure. + */ +unsigned long probe_irq_on(void) +{ + unsigned int i, irqs = 0; + unsigned long delay; + + /* + * first snaffle up any unassigned but + * probe-able interrupts + */ + spin_lock_irq(&irq_controller_lock); + for (i = 0; i < NR_IRQS; i++) { + if (!irq_desc[i].probe_ok || irq_desc[i].action) + continue; + + irq_desc[i].probing = 1; + irq_desc[i].triggered = 0; + if (irq_desc[i].chip->type) + irq_desc[i].chip->type(i, IRQT_PROBE); + irq_desc[i].chip->unmask(i); + irqs += 1; + } + spin_unlock_irq(&irq_controller_lock); + + /* + * wait for spurious interrupts to mask themselves out again + */ + for (delay = jiffies + HZ/10; time_before(jiffies, delay); ) + /* min 100ms delay */; + + /* + * now filter out any obviously spurious interrupts + */ + spin_lock_irq(&irq_controller_lock); + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc[i].probing && irq_desc[i].triggered) { + irq_desc[i].probing = 0; + irqs -= 1; + } + } + spin_unlock_irq(&irq_controller_lock); + + return irqs; +} + +EXPORT_SYMBOL(probe_irq_on); + +/* + * Possible return values: + * >= 0 - interrupt number + * -1 - no interrupt/many interrupts + */ +int probe_irq_off(unsigned long irqs) +{ + unsigned int i; + int irq_found = NO_IRQ; + + /* + * look at the interrupts, and find exactly one + * that we were probing has been triggered + */ + spin_lock_irq(&irq_controller_lock); + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc[i].probing && + irq_desc[i].triggered) { + if (irq_found != NO_IRQ) { + irq_found = NO_IRQ; + goto out; + } + irq_found = i; + } + } + + if (irq_found == -1) + irq_found = NO_IRQ; +out: + spin_unlock_irq(&irq_controller_lock); + + return irq_found; +} + +EXPORT_SYMBOL(probe_irq_off); + +void __init init_irq_proc(void) +{ +} + +void __init init_IRQ(void) +{ + struct irqdesc *desc; + extern void init_dma(void); + int irq; + + for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) + *desc = bad_irq_desc; + + arc_init_irq(); + init_dma(); +} diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c new file mode 100644 index 000000000000..46aea6ac194d --- /dev/null +++ b/arch/arm26/kernel/process.c @@ -0,0 +1,401 @@ +/* + * linux/arch/arm26/kernel/process.c + * + * Copyright (C) 2003 Ian Molton - adapted for ARM26 + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Origional Copyright (C) 1995 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <stdarg.h> + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/leds.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +extern const char *processor_modes[]; +extern void setup_mm_for_reboot(char mode); + +static volatile int hlt_counter; + +void disable_hlt(void) +{ + hlt_counter++; +} + +EXPORT_SYMBOL(disable_hlt); + +void enable_hlt(void) +{ + hlt_counter--; +} + +EXPORT_SYMBOL(enable_hlt); + +static int __init nohlt_setup(char *__unused) +{ + hlt_counter = 1; + return 1; +} + +static int __init hlt_setup(char *__unused) +{ + hlt_counter = 0; + return 1; +} + +__setup("nohlt", nohlt_setup); +__setup("hlt", hlt_setup); + +/* + * This is our default idle handler. We need to disable + * interrupts here to ensure we don't miss a wakeup call. + */ +void cpu_idle(void) +{ + /* endless idle loop with no priority at all */ + preempt_disable(); + while (1) { + while (!need_resched()) { + local_irq_disable(); + if (!need_resched() && !hlt_counter) + local_irq_enable(); + } + } + schedule(); +} + +static char reboot_mode = 'h'; + +int __init reboot_setup(char *str) +{ + reboot_mode = str[0]; + return 1; +} + +__setup("reboot=", reboot_setup); + +/* ARM26 cant do these but we still need to define them. */ +void machine_halt(void) +{ +} +void machine_power_off(void) +{ +} + +EXPORT_SYMBOL(machine_halt); +EXPORT_SYMBOL(machine_power_off); + +void machine_restart(char * __unused) +{ + /* + * Clean and disable cache, and turn off interrupts + */ + cpu_proc_fin(); + + /* + * Tell the mm system that we are going to reboot - + * we may need it to insert some 1:1 mappings so that + * soft boot works. + */ + setup_mm_for_reboot(reboot_mode); + + /* + * copy branch instruction to reset location and call it + */ + + *(unsigned long *)0 = *(unsigned long *)0x03800000; + ((void(*)(void))0)(); + + /* + * Whoops - the architecture was unable to reboot. + * Tell the user! Should never happen... + */ + mdelay(1000); + printk("Reboot failed -- System halted\n"); + while (1); +} + +EXPORT_SYMBOL(machine_restart); + +void show_regs(struct pt_regs * regs) +{ + unsigned long flags; + + flags = condition_codes(regs); + + printk("pc : [<%08lx>] lr : [<%08lx>] %s\n" + "sp : %08lx ip : %08lx fp : %08lx\n", + instruction_pointer(regs), + regs->ARM_lr, print_tainted(), regs->ARM_sp, + regs->ARM_ip, regs->ARM_fp); + printk("r10: %08lx r9 : %08lx r8 : %08lx\n", + regs->ARM_r10, regs->ARM_r9, + regs->ARM_r8); + printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n", + regs->ARM_r7, regs->ARM_r6, + regs->ARM_r5, regs->ARM_r4); + printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n", + regs->ARM_r3, regs->ARM_r2, + regs->ARM_r1, regs->ARM_r0); + printk("Flags: %c%c%c%c", + flags & PSR_N_BIT ? 'N' : 'n', + flags & PSR_Z_BIT ? 'Z' : 'z', + flags & PSR_C_BIT ? 'C' : 'c', + flags & PSR_V_BIT ? 'V' : 'v'); + printk(" IRQs o%s FIQs o%s Mode %s Segment %s\n", + interrupts_enabled(regs) ? "n" : "ff", + fast_interrupts_enabled(regs) ? "n" : "ff", + processor_modes[processor_mode(regs)], + get_fs() == get_ds() ? "kernel" : "user"); +} + +void show_fpregs(struct user_fp *regs) +{ + int i; + + for (i = 0; i < 8; i++) { + unsigned long *p; + char type; + + p = (unsigned long *)(regs->fpregs + i); + + switch (regs->ftype[i]) { + case 1: type = 'f'; break; + case 2: type = 'd'; break; + case 3: type = 'e'; break; + default: type = '?'; break; + } + if (regs->init_flag) + type = '?'; + + printk(" f%d(%c): %08lx %08lx %08lx%c", + i, type, p[0], p[1], p[2], i & 1 ? '\n' : ' '); + } + + + printk("FPSR: %08lx FPCR: %08lx\n", + (unsigned long)regs->fpsr, + (unsigned long)regs->fpcr); +} + +/* + * Task structure and kernel stack allocation. + */ +static unsigned long *thread_info_head; +static unsigned int nr_thread_info; + +extern unsigned long get_page_8k(int priority); +extern void free_page_8k(unsigned long page); + +// FIXME - is this valid? +#define EXTRA_TASK_STRUCT 0 +#define ll_alloc_task_struct() ((struct thread_info *)get_page_8k(GFP_KERNEL)) +#define ll_free_task_struct(p) free_page_8k((unsigned long)(p)) + +//FIXME - do we use *task param below looks like we dont, which is ok? +//FIXME - if EXTRA_TASK_STRUCT is zero we can optimise the below away permanently. *IF* its supposed to be zero. +struct thread_info *alloc_thread_info(struct task_struct *task) +{ + struct thread_info *thread = NULL; + + if (EXTRA_TASK_STRUCT) { + unsigned long *p = thread_info_head; + + if (p) { + thread_info_head = (unsigned long *)p[0]; + nr_thread_info -= 1; + } + thread = (struct thread_info *)p; + } + + if (!thread) + thread = ll_alloc_task_struct(); + +#ifdef CONFIG_MAGIC_SYSRQ + /* + * The stack must be cleared if you want SYSRQ-T to + * give sensible stack usage information + */ + if (thread) { + char *p = (char *)thread; + memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE); + } +#endif + return thread; +} + +void free_thread_info(struct thread_info *thread) +{ + if (EXTRA_TASK_STRUCT && nr_thread_info < EXTRA_TASK_STRUCT) { + unsigned long *p = (unsigned long *)thread; + p[0] = (unsigned long)thread_info_head; + thread_info_head = p; + nr_thread_info += 1; + } else + ll_free_task_struct(thread); +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ +} + +void flush_thread(void) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = current; + + memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); + memset(&thread->fpstate, 0, sizeof(union fp_state)); + + clear_used_math(); +} + +void release_thread(struct task_struct *dead_task) +{ +} + +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +int +copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, + unsigned long unused, struct task_struct *p, struct pt_regs *regs) +{ + struct thread_info *thread = p->thread_info; + struct pt_regs *childregs; + + childregs = __get_user_regs(thread); + *childregs = *regs; + childregs->ARM_r0 = 0; + childregs->ARM_sp = stack_start; + + memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); + thread->cpu_context.sp = (unsigned long)childregs; + thread->cpu_context.pc = (unsigned long)ret_from_fork | MODE_SVC26 | PSR_I_BIT; + + return 0; +} + +/* + * fill in the fpe structure for a core dump... + */ +int dump_fpu (struct pt_regs *regs, struct user_fp *fp) +{ + struct thread_info *thread = current_thread_info(); + int used_math = !!used_math(); + + if (used_math) + memcpy(fp, &thread->fpstate.soft, sizeof (*fp)); + + return used_math; +} + +/* + * fill in the user structure for a core dump.. + */ +void dump_thread(struct pt_regs * regs, struct user * dump) +{ + struct task_struct *tsk = current; + + dump->magic = CMAGIC; + dump->start_code = tsk->mm->start_code; + dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1); + + dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT; + dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT; + dump->u_ssize = 0; + + dump->u_debugreg[0] = tsk->thread.debug.bp[0].address; + dump->u_debugreg[1] = tsk->thread.debug.bp[1].address; + dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn; + dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn; + dump->u_debugreg[4] = tsk->thread.debug.nsaved; + + if (dump->start_stack < 0x04000000) + dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT; + + dump->regs = *regs; + dump->u_fpvalid = dump_fpu (regs, &dump->u_fp); +} + +/* + * Shuffle the argument into the correct register before calling the + * thread function. r1 is the thread argument, r2 is the pointer to + * the thread function, and r3 points to the exit function. + * FIXME - make sure this is right - the older code used to zero fp + * and cause the parent to call sys_exit (do_exit in this version) + */ +extern void kernel_thread_helper(void); + +asm( ".section .text\n" +" .align\n" +" .type kernel_thread_helper, #function\n" +"kernel_thread_helper:\n" +" mov r0, r1\n" +" mov lr, r3\n" +" mov pc, r2\n" +" .size kernel_thread_helper, . - kernel_thread_helper\n" +" .previous"); + +/* + * Create a kernel thread. + */ +pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.ARM_r1 = (unsigned long)arg; + regs.ARM_r2 = (unsigned long)fn; + regs.ARM_r3 = (unsigned long)do_exit; + regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26; + + return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long fp, lr; + unsigned long stack_page; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + stack_page = 4096 + (unsigned long)p; + fp = thread_saved_fp(p); + do { + if (fp < stack_page || fp > 4092+stack_page) + return 0; + lr = pc_pointer (((unsigned long *)fp)[-1]); + if (!in_sched_functions(lr)) + return lr; + fp = *(unsigned long *) (fp - 12); + } while (count ++ < 16); + return 0; +} diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c new file mode 100644 index 000000000000..2a137146a77c --- /dev/null +++ b/arch/arm26/kernel/ptrace.c @@ -0,0 +1,744 @@ +/* + * linux/arch/arm26/kernel/ptrace.c + * + * By Ross Biro 1/23/92 + * edited by Linus Torvalds + * ARM modifications Copyright (C) 2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/security.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/system.h> +//#include <asm/processor.h> + +#include "ptrace.h" + +#define REG_PC 15 +#define REG_PSR 15 +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Breakpoint SWI instruction: SWI &9F0001 + */ +#define BREAKINST_ARM 0xef9f0001 + +/* + * Get the address of the live pt_regs for the specified task. + * These are saved onto the top kernel stack when the process + * is not running. + * + * Note: if a user thread is execve'd from kernel space, the + * kernel stack will not be empty on entry to the kernel, so + * ptracing these tasks will fail. + */ +static inline struct pt_regs * +get_user_regs(struct task_struct *task) +{ + return __get_user_regs(task->thread_info); +} + +/* + * this routine will get a word off of the processes privileged stack. + * the offset is how far from the base addr as stored in the THREAD. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline long get_user_reg(struct task_struct *task, int offset) +{ + return get_user_regs(task)->uregs[offset]; +} + +/* + * this routine will put a word on the processes privileged stack. + * the offset is how far from the base addr as stored in the THREAD. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int +put_user_reg(struct task_struct *task, int offset, long data) +{ + struct pt_regs newregs, *regs = get_user_regs(task); + int ret = -EINVAL; + + newregs = *regs; + newregs.uregs[offset] = data; + + if (valid_user_regs(&newregs)) { + regs->uregs[offset] = data; + ret = 0; + } + + return ret; +} + +static inline int +read_u32(struct task_struct *task, unsigned long addr, u32 *res) +{ + int ret; + + ret = access_process_vm(task, addr, res, sizeof(*res), 0); + + return ret == sizeof(*res) ? 0 : -EIO; +} + +static inline int +read_instr(struct task_struct *task, unsigned long addr, u32 *res) +{ + int ret; + u32 val; + ret = access_process_vm(task, addr & ~3, &val, sizeof(val), 0); + ret = ret == sizeof(val) ? 0 : -EIO; + *res = val; + return ret; +} + +/* + * Get value of register `rn' (in the instruction) + */ +static unsigned long +ptrace_getrn(struct task_struct *child, unsigned long insn) +{ + unsigned int reg = (insn >> 16) & 15; + unsigned long val; + + val = get_user_reg(child, reg); + if (reg == 15) + val = pc_pointer(val + 8); //FIXME - correct for arm26? + + return val; +} + +/* + * Get value of operand 2 (in an ALU instruction) + */ +static unsigned long +ptrace_getaluop2(struct task_struct *child, unsigned long insn) +{ + unsigned long val; + int shift; + int type; + + if (insn & 1 << 25) { + val = insn & 255; + shift = (insn >> 8) & 15; + type = 3; + } else { + val = get_user_reg (child, insn & 15); + + if (insn & (1 << 4)) + shift = (int)get_user_reg (child, (insn >> 8) & 15); + else + shift = (insn >> 7) & 31; + + type = (insn >> 5) & 3; + } + + switch (type) { + case 0: val <<= shift; break; + case 1: val >>= shift; break; + case 2: + val = (((signed long)val) >> shift); + break; + case 3: + val = (val >> shift) | (val << (32 - shift)); + break; + } + return val; +} + +/* + * Get value of operand 2 (in a LDR instruction) + */ +static unsigned long +ptrace_getldrop2(struct task_struct *child, unsigned long insn) +{ + unsigned long val; + int shift; + int type; + + val = get_user_reg(child, insn & 15); + shift = (insn >> 7) & 31; + type = (insn >> 5) & 3; + + switch (type) { + case 0: val <<= shift; break; + case 1: val >>= shift; break; + case 2: + val = (((signed long)val) >> shift); + break; + case 3: + val = (val >> shift) | (val << (32 - shift)); + break; + } + return val; +} + +#define OP_MASK 0x01e00000 +#define OP_AND 0x00000000 +#define OP_EOR 0x00200000 +#define OP_SUB 0x00400000 +#define OP_RSB 0x00600000 +#define OP_ADD 0x00800000 +#define OP_ADC 0x00a00000 +#define OP_SBC 0x00c00000 +#define OP_RSC 0x00e00000 +#define OP_ORR 0x01800000 +#define OP_MOV 0x01a00000 +#define OP_BIC 0x01c00000 +#define OP_MVN 0x01e00000 + +static unsigned long +get_branch_address(struct task_struct *child, unsigned long pc, unsigned long insn) +{ + u32 alt = 0; + + switch (insn & 0x0e000000) { + case 0x00000000: + case 0x02000000: { + /* + * data processing + */ + long aluop1, aluop2, ccbit; + + if ((insn & 0xf000) != 0xf000) + break; + + aluop1 = ptrace_getrn(child, insn); + aluop2 = ptrace_getaluop2(child, insn); + ccbit = get_user_reg(child, REG_PSR) & PSR_C_BIT ? 1 : 0; + + switch (insn & OP_MASK) { + case OP_AND: alt = aluop1 & aluop2; break; + case OP_EOR: alt = aluop1 ^ aluop2; break; + case OP_SUB: alt = aluop1 - aluop2; break; + case OP_RSB: alt = aluop2 - aluop1; break; + case OP_ADD: alt = aluop1 + aluop2; break; + case OP_ADC: alt = aluop1 + aluop2 + ccbit; break; + case OP_SBC: alt = aluop1 - aluop2 + ccbit; break; + case OP_RSC: alt = aluop2 - aluop1 + ccbit; break; + case OP_ORR: alt = aluop1 | aluop2; break; + case OP_MOV: alt = aluop2; break; + case OP_BIC: alt = aluop1 & ~aluop2; break; + case OP_MVN: alt = ~aluop2; break; + } + break; + } + + case 0x04000000: + case 0x06000000: + /* + * ldr + */ + if ((insn & 0x0010f000) == 0x0010f000) { + unsigned long base; + + base = ptrace_getrn(child, insn); + if (insn & 1 << 24) { + long aluop2; + + if (insn & 0x02000000) + aluop2 = ptrace_getldrop2(child, insn); + else + aluop2 = insn & 0xfff; + + if (insn & 1 << 23) + base += aluop2; + else + base -= aluop2; + } + if (read_u32(child, base, &alt) == 0) + alt = pc_pointer(alt); + } + break; + + case 0x08000000: + /* + * ldm + */ + if ((insn & 0x00108000) == 0x00108000) { + unsigned long base; + unsigned int nr_regs; + + if (insn & (1 << 23)) { + nr_regs = hweight16(insn & 65535) << 2; + + if (!(insn & (1 << 24))) + nr_regs -= 4; + } else { + if (insn & (1 << 24)) + nr_regs = -4; + else + nr_regs = 0; + } + + base = ptrace_getrn(child, insn); + + if (read_u32(child, base + nr_regs, &alt) == 0) + alt = pc_pointer(alt); + break; + } + break; + + case 0x0a000000: { + /* + * bl or b + */ + signed long displ; + /* It's a branch/branch link: instead of trying to + * figure out whether the branch will be taken or not, + * we'll put a breakpoint at both locations. This is + * simpler, more reliable, and probably not a whole lot + * slower than the alternative approach of emulating the + * branch. + */ + displ = (insn & 0x00ffffff) << 8; + displ = (displ >> 6) + 8; + if (displ != 0 && displ != 4) + alt = pc + displ; + } + break; + } + + return alt; +} + +static int +swap_insn(struct task_struct *task, unsigned long addr, + void *old_insn, void *new_insn, int size) +{ + int ret; + + ret = access_process_vm(task, addr, old_insn, size, 0); + if (ret == size) + ret = access_process_vm(task, addr, new_insn, size, 1); + return ret; +} + +static void +add_breakpoint(struct task_struct *task, struct debug_info *dbg, unsigned long addr) +{ + int nr = dbg->nsaved; + + if (nr < 2) { + u32 new_insn = BREAKINST_ARM; + int res; + + res = swap_insn(task, addr, &dbg->bp[nr].insn, &new_insn, 4); + + if (res == 4) { + dbg->bp[nr].address = addr; + dbg->nsaved += 1; + } + } else + printk(KERN_ERR "ptrace: too many breakpoints\n"); +} + +/* + * Clear one breakpoint in the user program. We copy what the hardware + * does and use bit 0 of the address to indicate whether this is a Thumb + * breakpoint or an ARM breakpoint. + */ +static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp) +{ + unsigned long addr = bp->address; + u32 old_insn; + int ret; + + ret = swap_insn(task, addr & ~3, &old_insn, + &bp->insn, 4); + + if (ret != 4 || old_insn != BREAKINST_ARM) + printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at " + "0x%08lx (0x%08x)\n", task->comm, task->pid, + addr, old_insn); +} + +void ptrace_set_bpt(struct task_struct *child) +{ + struct pt_regs *regs; + unsigned long pc; + u32 insn; + int res; + + regs = get_user_regs(child); + pc = instruction_pointer(regs); + + res = read_instr(child, pc, &insn); + if (!res) { + struct debug_info *dbg = &child->thread.debug; + unsigned long alt; + + dbg->nsaved = 0; + + alt = get_branch_address(child, pc, insn); + if (alt) + add_breakpoint(child, dbg, alt); + + /* + * Note that we ignore the result of setting the above + * breakpoint since it may fail. When it does, this is + * not so much an error, but a forewarning that we may + * be receiving a prefetch abort shortly. + * + * If we don't set this breakpoint here, then we can + * lose control of the thread during single stepping. + */ + if (!alt || predicate(insn) != PREDICATE_ALWAYS) + add_breakpoint(child, dbg, pc + 4); + } +} + +/* + * Ensure no single-step breakpoint is pending. Returns non-zero + * value if child was being single-stepped. + */ +void ptrace_cancel_bpt(struct task_struct *child) +{ + int i, nsaved = child->thread.debug.nsaved; + + child->thread.debug.nsaved = 0; + + if (nsaved > 2) { + printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved); + nsaved = 2; + } + + for (i = 0; i < nsaved; i++) + clear_breakpoint(child, &child->thread.debug.bp[i]); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void ptrace_disable(struct task_struct *child) +{ + child->ptrace &= ~PT_SINGLESTEP; + ptrace_cancel_bpt(child); +} + +/* + * Handle hitting a breakpoint. + */ +void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) +{ + siginfo_t info; + + /* + * The PC is always left pointing at the next instruction. Fix this. + */ + regs->ARM_pc -= 4; + + if (tsk->thread.debug.nsaved == 0) + printk(KERN_ERR "ptrace: bogus breakpoint trap\n"); + + ptrace_cancel_bpt(tsk); + + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void *)instruction_pointer(regs) - 4; + + force_sig_info(SIGTRAP, &info, tsk); +} + +/* + * Read the word at offset "off" into the "struct user". We + * actually access the pt_regs stored on the kernel stack. + */ +static int ptrace_read_user(struct task_struct *tsk, unsigned long off, + unsigned long *ret) +{ + unsigned long tmp; + + if (off & 3 || off >= sizeof(struct user)) + return -EIO; + + tmp = 0; + if (off < sizeof(struct pt_regs)) + tmp = get_user_reg(tsk, off >> 2); + + return put_user(tmp, ret); +} + +/* + * Write the word at offset "off" into "struct user". We + * actually access the pt_regs stored on the kernel stack. + */ +static int ptrace_write_user(struct task_struct *tsk, unsigned long off, + unsigned long val) +{ + if (off & 3 || off >= sizeof(struct user)) + return -EIO; + + if (off >= sizeof(struct pt_regs)) + return 0; + + return put_user_reg(tsk, off >> 2, val); +} + +/* + * Get all user integer registers. + */ +static int ptrace_getregs(struct task_struct *tsk, void *uregs) +{ + struct pt_regs *regs = get_user_regs(tsk); + + return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0; +} + +/* + * Set all user integer registers. + */ +static int ptrace_setregs(struct task_struct *tsk, void *uregs) +{ + struct pt_regs newregs; + int ret; + + ret = -EFAULT; + if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) { + struct pt_regs *regs = get_user_regs(tsk); + + ret = -EINVAL; + if (valid_user_regs(&newregs)) { + *regs = newregs; + ret = 0; + } + } + + return ret; +} + +/* + * Get the child FPU state. + */ +static int ptrace_getfpregs(struct task_struct *tsk, void *ufp) +{ + return copy_to_user(ufp, &tsk->thread_info->fpstate, + sizeof(struct user_fp)) ? -EFAULT : 0; +} + +/* + * Set the child FPU state. + */ +static int ptrace_setfpregs(struct task_struct *tsk, void *ufp) +{ + set_stopped_child_used_math(tsk); + return copy_from_user(&tsk->thread_info->fpstate, ufp, + sizeof(struct user_fp)) ? -EFAULT : 0; +} + +static int do_ptrace(int request, struct task_struct *child, long addr, long data) +{ + unsigned long tmp; + int ret; + + switch (request) { + /* + * read word at location "addr" in the child process. + */ + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = access_process_vm(child, addr, &tmp, + sizeof(unsigned long), 0); + if (ret == sizeof(unsigned long)) + ret = put_user(tmp, (unsigned long *) data); + else + ret = -EIO; + break; + + case PTRACE_PEEKUSR: + ret = ptrace_read_user(child, addr, (unsigned long *)data); + break; + + /* + * write the word at location addr. + */ + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = access_process_vm(child, addr, &data, + sizeof(unsigned long), 1); + if (ret == sizeof(unsigned long)) + ret = 0; + else + ret = -EIO; + break; + + case PTRACE_POKEUSR: + ret = ptrace_write_user(child, addr, data); + break; + + /* + * continue/restart and stop at next (return from) syscall + */ + case PTRACE_SYSCALL: + case PTRACE_CONT: + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* make sure single-step breakpoint is gone. */ + child->ptrace &= ~PT_SINGLESTEP; + ptrace_cancel_bpt(child); + wake_up_process(child); + ret = 0; + break; + + /* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: + /* make sure single-step breakpoint is gone. */ + child->ptrace &= ~PT_SINGLESTEP; + ptrace_cancel_bpt(child); + if (child->exit_state != EXIT_ZOMBIE) { + child->exit_code = SIGKILL; + wake_up_process(child); + } + ret = 0; + break; + + /* + * execute single instruction. + */ + case PTRACE_SINGLESTEP: + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace |= PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + + case PTRACE_GETREGS: + ret = ptrace_getregs(child, (void *)data); + break; + + case PTRACE_SETREGS: + ret = ptrace_setregs(child, (void *)data); + break; + + case PTRACE_GETFPREGS: + ret = ptrace_getfpregs(child, (void *)data); + break; + + case PTRACE_SETFPREGS: + ret = ptrace_setfpregs(child, (void *)data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + int ret; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + ret = security_ptrace(current->parent, current); + if (ret) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret == 0) + ret = do_ptrace(request, child, addr, data); + +out_tsk: + put_task_struct(child); +out: + unlock_kernel(); + return ret; +} + +asmlinkage void syscall_trace(int why, struct pt_regs *regs) +{ + unsigned long ip; + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + + /* + * Save IP. IP is used to denote syscall entry/exit: + * IP = 0 -> entry, = 1 -> exit + */ + ip = regs->ARM_ip; + regs->ARM_ip = why; + + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } + regs->ARM_ip = ip; +} diff --git a/arch/arm26/kernel/ptrace.h b/arch/arm26/kernel/ptrace.h new file mode 100644 index 000000000000..846c9d8d36ed --- /dev/null +++ b/arch/arm26/kernel/ptrace.h @@ -0,0 +1,13 @@ +/* + * linux/arch/arm26/kernel/ptrace.h + * + * Copyright (C) 2000-2003 Russell King + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +extern void ptrace_cancel_bpt(struct task_struct *); +extern void ptrace_set_bpt(struct task_struct *); +extern void ptrace_break(struct task_struct *, struct pt_regs *); diff --git a/arch/arm26/kernel/semaphore.c b/arch/arm26/kernel/semaphore.c new file mode 100644 index 000000000000..3023a53431ff --- /dev/null +++ b/arch/arm26/kernel/semaphore.c @@ -0,0 +1,223 @@ +/* + * ARM semaphore implementation, taken from + * + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * (C) Copyright 2003 Ian Molton (ARM26 mods) + * + * Modified for ARM by Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/init.h> + +#include <asm/semaphore.h> + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is + * protected by the semaphore spinlock. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in <asm/semaphore.h> + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +static DEFINE_SPINLOCK(semaphore_lock); + +void __sched __down(struct semaphore * sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_UNINTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + wake_up(&sem->wait); +} + +int __sched __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_INTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. The + * "-1" is because we're still hoping to get + * the lock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} + +/* + * Trylock failed - make sure we correct for + * having decremented the count. + * + * We could have done the trylock with a + * single "cmpxchg" without failure cases, + * but then it wouldn't work on a 386. + */ +int __down_trylock(struct semaphore * sem) +{ + int sleepers; + unsigned long flags; + + spin_lock_irqsave(&semaphore_lock, flags); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers, &sem->count)) + wake_up(&sem->wait); + + spin_unlock_irqrestore(&semaphore_lock, flags); + return 1; +} + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * ip contains the semaphore pointer on entry. Save the C-clobbered + * registers (r0 to r3 and lr), but not ip, as we use it as a return + * value in some cases.. + */ +asm(" .section .sched.text , #alloc, #execinstr \n\ + .align 5 \n\ + .globl __down_failed \n\ +__down_failed: \n\ + stmfd sp!, {r0 - r3, lr} \n\ + mov r0, ip \n\ + bl __down \n\ + ldmfd sp!, {r0 - r3, pc}^ \n\ + \n\ + .align 5 \n\ + .globl __down_interruptible_failed \n\ +__down_interruptible_failed: \n\ + stmfd sp!, {r0 - r3, lr} \n\ + mov r0, ip \n\ + bl __down_interruptible \n\ + mov ip, r0 \n\ + ldmfd sp!, {r0 - r3, pc}^ \n\ + \n\ + .align 5 \n\ + .globl __down_trylock_failed \n\ +__down_trylock_failed: \n\ + stmfd sp!, {r0 - r3, lr} \n\ + mov r0, ip \n\ + bl __down_trylock \n\ + mov ip, r0 \n\ + ldmfd sp!, {r0 - r3, pc}^ \n\ + \n\ + .align 5 \n\ + .globl __up_wakeup \n\ +__up_wakeup: \n\ + stmfd sp!, {r0 - r3, lr} \n\ + mov r0, ip \n\ + bl __up \n\ + ldmfd sp!, {r0 - r3, pc}^ \n\ + "); + +EXPORT_SYMBOL(__down_failed); +EXPORT_SYMBOL(__down_interruptible_failed); +EXPORT_SYMBOL(__down_trylock_failed); +EXPORT_SYMBOL(__up_wakeup); + diff --git a/arch/arm26/kernel/setup.c b/arch/arm26/kernel/setup.c new file mode 100644 index 000000000000..4eb329e3828a --- /dev/null +++ b/arch/arm26/kernel/setup.c @@ -0,0 +1,573 @@ +/* + * linux/arch/arm26/kernel/setup.c + * + * Copyright (C) 1995-2001 Russell King + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/utsname.h> +#include <linux/blkdev.h> +#include <linux/console.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/tty.h> +#include <linux/init.h> +#include <linux/root_dev.h> + +#include <asm/elf.h> +#include <asm/hardware.h> +#include <asm/io.h> +#include <asm/procinfo.h> +#include <asm/setup.h> +#include <asm/mach-types.h> +#include <asm/tlbflush.h> + +#include <asm/irqchip.h> + +#ifndef MEM_SIZE +#define MEM_SIZE (16*1024*1024) +#endif + +#ifdef CONFIG_PREEMPT +DEFINE_SPINLOCK(kernel_flag); +#endif + +#if defined(CONFIG_FPE_NWFPE) +char fpe_type[8]; + +static int __init fpe_setup(char *line) +{ + memcpy(fpe_type, line, 8); + return 1; +} + +__setup("fpe=", fpe_setup); +#endif + +extern void paging_init(struct meminfo *); +extern void convert_to_tag_list(struct tag *tags); +extern void squash_mem_tags(struct tag *tag); +extern void bootmem_init(struct meminfo *); +extern int root_mountflags; +extern int _stext, _text, _etext, _edata, _end; +#ifdef CONFIG_XIP_KERNEL +extern int _endtext, _sdata; +#endif + + +unsigned int processor_id; +unsigned int __machine_arch_type; +unsigned int system_rev; +unsigned int system_serial_low; +unsigned int system_serial_high; +unsigned int elf_hwcap; +unsigned int memc_ctrl_reg; +unsigned int number_mfm_drives; + +struct processor processor; + +char elf_platform[ELF_PLATFORM_SIZE]; + +unsigned long phys_initrd_start __initdata = 0; +unsigned long phys_initrd_size __initdata = 0; +static struct meminfo meminfo __initdata = { 0, }; +static struct proc_info_item proc_info; +static const char *machine_name; +static char command_line[COMMAND_LINE_SIZE]; + +static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { + { "Video RAM", 0, 0, IORESOURCE_MEM }, + { "Kernel code", 0, 0, IORESOURCE_MEM }, + { "Kernel data", 0, 0, IORESOURCE_MEM } +}; + +#define video_ram mem_res[0] +#define kernel_code mem_res[1] +#define kernel_data mem_res[2] + +static struct resource io_res[] = { + { "reserved", 0x3bc, 0x3be, IORESOURCE_IO | IORESOURCE_BUSY }, + { "reserved", 0x378, 0x37f, IORESOURCE_IO | IORESOURCE_BUSY }, + { "reserved", 0x278, 0x27f, IORESOURCE_IO | IORESOURCE_BUSY } +}; + +#define lp0 io_res[0] +#define lp1 io_res[1] +#define lp2 io_res[2] + +#define dump_cpu_info() do { } while (0) + +static void __init setup_processor(void) +{ + extern struct proc_info_list __proc_info_begin, __proc_info_end; + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm26/mm/proc-*.S + */ + for (list = &__proc_info_begin; list < &__proc_info_end ; list++) + if ((processor_id & list->cpu_mask) == list->cpu_val) + break; + + /* + * If processor type is unrecognised, then we + * can do nothing... + */ + if (list >= &__proc_info_end) { + printk("CPU configuration botched (ID %08x), unable " + "to continue.\n", processor_id); + while (1); + } + + proc_info = *list->info; + processor = *list->proc; + + + printk("CPU: %s %s revision %d\n", + proc_info.manufacturer, proc_info.cpu_name, + (int)processor_id & 15); + + dump_cpu_info(); + + sprintf(system_utsname.machine, "%s", list->arch_name); + sprintf(elf_platform, "%s", list->elf_name); + elf_hwcap = list->elf_hwcap; + + cpu_proc_init(); +} + +/* + * Initial parsing of the command line. We need to pick out the + * memory size. We look for mem=size@start, where start and size + * are "size[KkMm]" + */ +static void __init +parse_cmdline(struct meminfo *mi, char **cmdline_p, char *from) +{ + char c = ' ', *to = command_line; + int usermem = 0, len = 0; + + for (;;) { + if (c == ' ' && !memcmp(from, "mem=", 4)) { + unsigned long size, start; + + if (to != command_line) + to -= 1; + + /* + * If the user specifies memory size, we + * blow away any automatically generated + * size. + */ + if (usermem == 0) { + usermem = 1; + mi->nr_banks = 0; + } + + start = PHYS_OFFSET; + size = memparse(from + 4, &from); + if (*from == '@') + start = memparse(from + 1, &from); + + mi->bank[mi->nr_banks].start = start; + mi->bank[mi->nr_banks].size = size; + mi->bank[mi->nr_banks].node = PHYS_TO_NID(start); + mi->nr_banks += 1; + } + c = *from++; + if (!c) + break; + if (COMMAND_LINE_SIZE <= ++len) + break; + *to++ = c; + } + *to = '\0'; + *cmdline_p = command_line; +} + +static void __init +setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz) +{ +#ifdef CONFIG_BLK_DEV_RAM + extern int rd_size, rd_image_start, rd_prompt, rd_doload; + + rd_image_start = image_start; + rd_prompt = prompt; + rd_doload = doload; + + if (rd_sz) + rd_size = rd_sz; +#endif +} + +static void __init +request_standard_resources(struct meminfo *mi) +{ + struct resource *res; + int i; + + kernel_code.start = init_mm.start_code; + kernel_code.end = init_mm.end_code - 1; +#ifdef CONFIG_XIP_KERNEL + kernel_data.start = init_mm.start_data; +#else + kernel_data.start = init_mm.end_code; +#endif + kernel_data.end = init_mm.brk - 1; + + for (i = 0; i < mi->nr_banks; i++) { + unsigned long virt_start, virt_end; + + if (mi->bank[i].size == 0) + continue; + + virt_start = mi->bank[i].start; + virt_end = virt_start + mi->bank[i].size - 1; + + res = alloc_bootmem_low(sizeof(*res)); + res->name = "System RAM"; + res->start = virt_start; + res->end = virt_end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + request_resource(&iomem_resource, res); + + if (kernel_code.start >= res->start && + kernel_code.end <= res->end) + request_resource(res, &kernel_code); + if (kernel_data.start >= res->start && + kernel_data.end <= res->end) + request_resource(res, &kernel_data); + } + +/* FIXME - needed? if (mdesc->video_start) { + video_ram.start = mdesc->video_start; + video_ram.end = mdesc->video_end; + request_resource(&iomem_resource, &video_ram); + }*/ + + /* + * Some machines don't have the possibility of ever + * possessing lp1 or lp2 + */ + if (0) /* FIXME - need to do this for A5k at least */ + request_resource(&ioport_resource, &lp0); +} + +/* + * Tag parsing. + * + * This is the new way of passing data to the kernel at boot time. Rather + * than passing a fixed inflexible structure to the kernel, we pass a list + * of variable-sized tags to the kernel. The first tag must be a ATAG_CORE + * tag for the list to be recognised (to distinguish the tagged list from + * a param_struct). The list is terminated with a zero-length tag (this tag + * is not parsed in any way). + */ +static int __init parse_tag_core(const struct tag *tag) +{ + if (tag->hdr.size > 2) { + if ((tag->u.core.flags & 1) == 0) + root_mountflags &= ~MS_RDONLY; + ROOT_DEV = old_decode_dev(tag->u.core.rootdev); + } + return 0; +} + +__tagtable(ATAG_CORE, parse_tag_core); + +static int __init parse_tag_mem32(const struct tag *tag) +{ + if (meminfo.nr_banks >= NR_BANKS) { + printk(KERN_WARNING + "Ignoring memory bank 0x%08x size %dKB\n", + tag->u.mem.start, tag->u.mem.size / 1024); + return -EINVAL; + } + meminfo.bank[meminfo.nr_banks].start = tag->u.mem.start; + meminfo.bank[meminfo.nr_banks].size = tag->u.mem.size; + meminfo.bank[meminfo.nr_banks].node = PHYS_TO_NID(tag->u.mem.start); + meminfo.nr_banks += 1; + + return 0; +} + +__tagtable(ATAG_MEM, parse_tag_mem32); + +#if defined(CONFIG_DUMMY_CONSOLE) +struct screen_info screen_info = { + .orig_video_lines = 30, + .orig_video_cols = 80, + .orig_video_mode = 0, + .orig_video_ega_bx = 0, + .orig_video_isVGA = 1, + .orig_video_points = 8 +}; + +static int __init parse_tag_videotext(const struct tag *tag) +{ + screen_info.orig_x = tag->u.videotext.x; + screen_info.orig_y = tag->u.videotext.y; + screen_info.orig_video_page = tag->u.videotext.video_page; + screen_info.orig_video_mode = tag->u.videotext.video_mode; + screen_info.orig_video_cols = tag->u.videotext.video_cols; + screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx; + screen_info.orig_video_lines = tag->u.videotext.video_lines; + screen_info.orig_video_isVGA = tag->u.videotext.video_isvga; + screen_info.orig_video_points = tag->u.videotext.video_points; + return 0; +} + +__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); +#endif + +static int __init parse_tag_acorn(const struct tag *tag) +{ + memc_ctrl_reg = tag->u.acorn.memc_control_reg; + number_mfm_drives = tag->u.acorn.adfsdrives; + return 0; +} + +__tagtable(ATAG_ACORN, parse_tag_acorn); + +static int __init parse_tag_ramdisk(const struct tag *tag) +{ + setup_ramdisk((tag->u.ramdisk.flags & 1) == 0, + (tag->u.ramdisk.flags & 2) == 0, + tag->u.ramdisk.start, tag->u.ramdisk.size); + return 0; +} + +__tagtable(ATAG_RAMDISK, parse_tag_ramdisk); + +static int __init parse_tag_initrd(const struct tag *tag) +{ + printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n"); + phys_initrd_start = (unsigned long)tag->u.initrd.start; + phys_initrd_size = (unsigned long)tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD, parse_tag_initrd); + +static int __init parse_tag_initrd2(const struct tag *tag) +{ + printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n"); + phys_initrd_start = (unsigned long)tag->u.initrd.start; + phys_initrd_size = (unsigned long)tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD2, parse_tag_initrd2); + +static int __init parse_tag_serialnr(const struct tag *tag) +{ + system_serial_low = tag->u.serialnr.low; + system_serial_high = tag->u.serialnr.high; + return 0; +} + +__tagtable(ATAG_SERIAL, parse_tag_serialnr); + +static int __init parse_tag_revision(const struct tag *tag) +{ + system_rev = tag->u.revision.rev; + return 0; +} + +__tagtable(ATAG_REVISION, parse_tag_revision); + +static int __init parse_tag_cmdline(const struct tag *tag) +{ + strncpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); + default_command_line[COMMAND_LINE_SIZE - 1] = '\0'; + return 0; +} + +__tagtable(ATAG_CMDLINE, parse_tag_cmdline); + +/* + * Scan the tag table for this tag, and call its parse function. + * The tag table is built by the linker from all the __tagtable + * declarations. + */ +static int __init parse_tag(const struct tag *tag) +{ + extern struct tagtable __tagtable_begin, __tagtable_end; + struct tagtable *t; + + for (t = &__tagtable_begin; t < &__tagtable_end; t++) + if (tag->hdr.tag == t->tag) { + t->parse(tag); + break; + } + + return t < &__tagtable_end; +} + +/* + * Parse all tags in the list, checking both the global and architecture + * specific tag tables. + */ +static void __init parse_tags(const struct tag *t) +{ + for (; t->hdr.size; t = tag_next(t)) + if (!parse_tag(t)) + printk(KERN_WARNING + "Ignoring unrecognised tag 0x%08x\n", + t->hdr.tag); +} + +/* + * This holds our defaults. + */ +static struct init_tags { + struct tag_header hdr1; + struct tag_core core; + struct tag_header hdr2; + struct tag_mem32 mem; + struct tag_header hdr3; +} init_tags __initdata = { + { tag_size(tag_core), ATAG_CORE }, + { 1, PAGE_SIZE, 0xff }, + { tag_size(tag_mem32), ATAG_MEM }, + { MEM_SIZE, PHYS_OFFSET }, + { 0, ATAG_NONE } +}; + +void __init setup_arch(char **cmdline_p) +{ + struct tag *tags = (struct tag *)&init_tags; + char *from = default_command_line; + + setup_processor(); + if(machine_arch_type == MACH_TYPE_A5K) + machine_name = "A5000"; + else if(machine_arch_type == MACH_TYPE_ARCHIMEDES) + machine_name = "Archimedes"; + else + machine_name = "UNKNOWN"; + + //FIXME - the tag struct is always copied here but this is a block + // of RAM that is accidentally reserved along with video RAM. perhaps + // it would be a good idea to explicitly reserve this? + + tags = (struct tag *)0x0207c000; + + /* + * If we have the old style parameters, convert them to + * a tag list. + */ + if (tags->hdr.tag != ATAG_CORE) + convert_to_tag_list(tags); + if (tags->hdr.tag != ATAG_CORE) + tags = (struct tag *)&init_tags; + if (tags->hdr.tag == ATAG_CORE) { + if (meminfo.nr_banks != 0) + squash_mem_tags(tags); + parse_tags(tags); + } + + init_mm.start_code = (unsigned long) &_text; +#ifndef CONFIG_XIP_KERNEL + init_mm.end_code = (unsigned long) &_etext; +#else + init_mm.end_code = (unsigned long) &_endtext; + init_mm.start_data = (unsigned long) &_sdata; +#endif + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + memcpy(saved_command_line, from, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + parse_cmdline(&meminfo, cmdline_p, from); + bootmem_init(&meminfo); + paging_init(&meminfo); + request_standard_resources(&meminfo); + +#ifdef CONFIG_VT +#if defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif +} + +static const char *hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + NULL +}; + +static int c_show(struct seq_file *m, void *v) +{ + int i; + + seq_printf(m, "Processor\t: %s %s rev %d (%s)\n", + proc_info.manufacturer, proc_info.cpu_name, + (int)processor_id & 15, elf_platform); + + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100); + + /* dump out the processor features */ + seq_puts(m, "Features\t: "); + + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, "%s ", hwcap_str[i]); + + seq_puts(m, "\n"); + + seq_printf(m, "CPU part\t\t: %07x\n", processor_id >> 4); + seq_printf(m, "CPU revision\t: %d\n\n", processor_id & 15); + seq_printf(m, "Hardware\t: %s\n", machine_name); + seq_printf(m, "Revision\t: %04x\n", system_rev); + seq_printf(m, "Serial\t\t: %08x%08x\n", + system_serial_high, system_serial_low); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; diff --git a/arch/arm26/kernel/signal.c b/arch/arm26/kernel/signal.c new file mode 100644 index 000000000000..356d9809cc0b --- /dev/null +++ b/arch/arm26/kernel/signal.c @@ -0,0 +1,540 @@ +/* + * linux/arch/arm26/kernel/signal.c + * + * Copyright (C) 1995-2002 Russell King + * Copyright (C) 2003 Ian Molton (ARM26) + * + * FIXME!!! This is probably very broken (13/05/2003) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/personality.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/elf.h> + +#include <asm/pgalloc.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +#include "ptrace.h" + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +/* + * For ARM syscalls, we encode the syscall number into the instruction. + */ +#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)) +#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)) + +static int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall); + +/* + * atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask, struct pt_regs *regs) +{ + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + regs->ARM_r0 = -EINTR; + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs, 0)) + return regs->ARM_r0; + } +} + +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs) +{ + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + regs->ARM_r0 = -EINTR; + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs, 0)) + return regs->ARM_r0; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +/* + * Do a signal return; undo the signal stack. + */ +struct sigframe +{ + struct sigcontext sc; + unsigned long extramask[_NSIG_WORDS-1]; + unsigned long retcode; +}; + +struct rt_sigframe +{ + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + unsigned long retcode; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) +{ + int err = 0; + + __get_user_error(regs->ARM_r0, &sc->arm_r0, err); + __get_user_error(regs->ARM_r1, &sc->arm_r1, err); + __get_user_error(regs->ARM_r2, &sc->arm_r2, err); + __get_user_error(regs->ARM_r3, &sc->arm_r3, err); + __get_user_error(regs->ARM_r4, &sc->arm_r4, err); + __get_user_error(regs->ARM_r5, &sc->arm_r5, err); + __get_user_error(regs->ARM_r6, &sc->arm_r6, err); + __get_user_error(regs->ARM_r7, &sc->arm_r7, err); + __get_user_error(regs->ARM_r8, &sc->arm_r8, err); + __get_user_error(regs->ARM_r9, &sc->arm_r9, err); + __get_user_error(regs->ARM_r10, &sc->arm_r10, err); + __get_user_error(regs->ARM_fp, &sc->arm_fp, err); + __get_user_error(regs->ARM_ip, &sc->arm_ip, err); + __get_user_error(regs->ARM_sp, &sc->arm_sp, err); + __get_user_error(regs->ARM_lr, &sc->arm_lr, err); + __get_user_error(regs->ARM_pc, &sc->arm_pc, err); + + err |= !valid_user_regs(regs); + + return err; +} + +asmlinkage int sys_sigreturn(struct pt_regs *regs) +{ + struct sigframe *frame; + sigset_t set; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 7) + goto badframe; + + frame = (struct sigframe *)regs->ARM_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext(regs, &frame->sc)) + goto badframe; + + /* Send SIGTRAP if we're single-stepping */ + if (current->ptrace & PT_SINGLESTEP) { + ptrace_cancel_bpt(current); + send_sig(SIGTRAP, current, 1); + } + + return regs->ARM_r0; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe *frame; + sigset_t set; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 7) + goto badframe; + + frame = (struct rt_sigframe *)regs->ARM_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + /* Send SIGTRAP if we're single-stepping */ + if (current->ptrace & PT_SINGLESTEP) { + ptrace_cancel_bpt(current); + send_sig(SIGTRAP, current, 1); + } + + return regs->ARM_r0; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +static int +setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/ + struct pt_regs *regs, unsigned long mask) +{ + int err = 0; + + __put_user_error(regs->ARM_r0, &sc->arm_r0, err); + __put_user_error(regs->ARM_r1, &sc->arm_r1, err); + __put_user_error(regs->ARM_r2, &sc->arm_r2, err); + __put_user_error(regs->ARM_r3, &sc->arm_r3, err); + __put_user_error(regs->ARM_r4, &sc->arm_r4, err); + __put_user_error(regs->ARM_r5, &sc->arm_r5, err); + __put_user_error(regs->ARM_r6, &sc->arm_r6, err); + __put_user_error(regs->ARM_r7, &sc->arm_r7, err); + __put_user_error(regs->ARM_r8, &sc->arm_r8, err); + __put_user_error(regs->ARM_r9, &sc->arm_r9, err); + __put_user_error(regs->ARM_r10, &sc->arm_r10, err); + __put_user_error(regs->ARM_fp, &sc->arm_fp, err); + __put_user_error(regs->ARM_ip, &sc->arm_ip, err); + __put_user_error(regs->ARM_sp, &sc->arm_sp, err); + __put_user_error(regs->ARM_lr, &sc->arm_lr, err); + __put_user_error(regs->ARM_pc, &sc->arm_pc, err); + + __put_user_error(current->thread.trap_no, &sc->trap_no, err); + __put_user_error(current->thread.error_code, &sc->error_code, err); + __put_user_error(current->thread.address, &sc->fault_address, err); + __put_user_error(mask, &sc->oldmask, err); + + return err; +} + +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) +{ + unsigned long sp = regs->ARM_sp; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + /* + * ATPCS B01 mandates 8-byte alignment + */ + return (void *)((sp - framesize) & ~7); +} + +static int +setup_return(struct pt_regs *regs, struct k_sigaction *ka, + unsigned long *rc, void *frame, int usig) +{ + unsigned long handler = (unsigned long)ka->sa.sa_handler; + unsigned long retcode; + + if (ka->sa.sa_flags & SA_RESTORER) { + retcode = (unsigned long)ka->sa.sa_restorer; + } else { + + if (__put_user((ka->sa.sa_flags & SA_SIGINFO)?SWI_SYS_RT_SIGRETURN:SWI_SYS_SIGRETURN, rc)) + return 1; + + retcode = ((unsigned long)rc); + } + + regs->ARM_r0 = usig; + regs->ARM_sp = (unsigned long)frame; + regs->ARM_lr = retcode; + regs->ARM_pc = handler & ~3; + + return 0; +} + +static int +setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) +{ + struct sigframe *frame = get_sigframe(ka, regs, sizeof(*frame)); + int err = 0; + + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + return 1; + + err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]); + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + + if (err == 0) + err = setup_return(regs, ka, &frame->retcode, frame, usig); + + return err; +} + +static int +setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe *frame = get_sigframe(ka, regs, sizeof(*frame)); + int err = 0; + + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + return 1; + + __put_user_error(&frame->info, &frame->pinfo, err); + __put_user_error(&frame->uc, &frame->puc, err); + err |= copy_siginfo_to_user(&frame->info, info); + + /* Clear all the bits of the ucontext we don't use. */ + err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + + err |= setup_sigcontext(&frame->uc.uc_mcontext, /*&frame->fpstate,*/ + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err == 0) + err = setup_return(regs, ka, &frame->retcode, frame, usig); + + if (err == 0) { + /* + * For realtime signals we must also set the second and third + * arguments for the signal handler. + * -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06 + */ + regs->ARM_r1 = (unsigned long)frame->pinfo; + regs->ARM_r2 = (unsigned long)frame->puc; + } + + return err; +} + +static inline void restart_syscall(struct pt_regs *regs) +{ + regs->ARM_r0 = regs->ARM_ORIG_r0; + regs->ARM_pc -= 4; +} + +/* + * OK, we're invoking a handler + */ +static void +handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, + struct pt_regs * regs, int syscall) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = current; + struct k_sigaction *ka = &tsk->sighand->action[sig-1]; + int usig = sig; + int ret; + + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall) { + switch (regs->ARM_r0) { + case -ERESTART_RESTARTBLOCK: + current_thread_info()->restart_block.fn = + do_no_restart_syscall; + case -ERESTARTNOHAND: + regs->ARM_r0 = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->ARM_r0 = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + restart_syscall(regs); + } + } + + /* + * translate the signal + */ + if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) + usig = thread->exec_domain->signal_invmap[usig]; + + /* + * Set up the stack frame + */ + if (ka->sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame(usig, ka, info, oldset, regs); + else + ret = setup_frame(usig, ka, oldset, regs); + + /* + * Check that the resulting registers are actually sane. + */ + ret |= !valid_user_regs(regs); + + if (ret == 0) { + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(&tsk->sighand->siglock); + sigorsets(&tsk->blocked, &tsk->blocked, + &ka->sa.sa_mask); + sigaddset(&tsk->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); + } + return; + } + + force_sigsegv(sig, tsk); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) +{ + siginfo_t info; + int signr; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if (!user_mode(regs)) + return 0; + + if (current->ptrace & PT_SINGLESTEP) + ptrace_cancel_bpt(current); + + signr = get_signal_to_deliver(&info, regs, NULL); + if (signr > 0) { + handle_signal(signr, &info, oldset, regs, syscall); + if (current->ptrace & PT_SINGLESTEP) + ptrace_set_bpt(current); + return 1; + } + + /* + * No signal to deliver to the process - restart the syscall. + */ + if (syscall) { + if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { + u32 *usp; + + regs->ARM_sp -= 12; + usp = (u32 *)regs->ARM_sp; + + put_user(regs->ARM_pc, &usp[0]); + /* swi __NR_restart_syscall */ + put_user(0xef000000 | __NR_restart_syscall, &usp[1]); + /* ldr pc, [sp], #12 */ +// FIXME!!! is #12 correct there? + put_user(0xe49df00c, &usp[2]); + + regs->ARM_pc = regs->ARM_sp + 4; + } + if (regs->ARM_r0 == -ERESTARTNOHAND || + regs->ARM_r0 == -ERESTARTSYS || + regs->ARM_r0 == -ERESTARTNOINTR) { + restart_syscall(regs); + } + } + if (current->ptrace & PT_SINGLESTEP) + ptrace_set_bpt(current); + return 0; +} + +asmlinkage void +do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall) +{ + if (thread_flags & _TIF_SIGPENDING) + do_signal(¤t->blocked, regs, syscall); +} diff --git a/arch/arm26/kernel/sys_arm.c b/arch/arm26/kernel/sys_arm.c new file mode 100644 index 000000000000..e7edd201579a --- /dev/null +++ b/arch/arm26/kernel/sys_arm.c @@ -0,0 +1,324 @@ +/* + * linux/arch/arm26/kernel/sys_arm.c + * + * Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c + * Copyright (C) 1995, 1996 Russell King. + * Copyright (C) 2003 Ian Molton. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/arm + * platform. + */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/stat.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/utsname.h> + +#include <asm/uaccess.h> +#include <asm/ipc.h> + +extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, + unsigned long new_len, unsigned long flags, + unsigned long new_addr); + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way unix traditionally does this, though. + */ +asmlinkage int sys_pipe(unsigned long * fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + return error; +} + +/* common code for old and new mmaps */ +inline long do_mmap2( + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + int error = -EINVAL; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + /* + * If we are doing a fixed mapping, and address < PAGE_SIZE, + * then deny it. + */ + if (flags & MAP_FIXED && addr < PAGE_SIZE && vectors_base() == 0) + goto out; + + error = -EBADF; + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return error; +} + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +asmlinkage int old_mmap(struct mmap_arg_struct *arg) +{ + int error = -EFAULT; + struct mmap_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + + error = -EINVAL; + if (a.offset & ~PAGE_MASK) + goto out; + + error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); +out: + return error; +} + +asmlinkage unsigned long +sys_arm_mremap(unsigned long addr, unsigned long old_len, + unsigned long new_len, unsigned long flags, + unsigned long new_addr) +{ + unsigned long ret = -EINVAL; + + /* + * If we are doing a fixed mapping, and address < PAGE_SIZE, + * then deny it. + */ + if (flags & MREMAP_FIXED && new_addr < PAGE_SIZE && + vectors_base() == 0) + goto out; + + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); + +out: + return ret; +} + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. + */ + +struct sel_arg_struct { + unsigned long n; + fd_set *inp, *outp, *exp; + struct timeval *tvp; +}; + +asmlinkage int old_select(struct sel_arg_struct *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + /* sys_select() does the appropriate kernel locking */ + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +asmlinkage int sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semop (first, (struct sembuf *)ptr, second); + case SEMGET: + return sys_semget (first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void **) ptr)) + return -EFAULT; + return sys_semctl (first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd (first, (struct msgbuf *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + if (copy_from_user(&tmp,(struct ipc_kludge *) ptr, + sizeof (tmp))) + return -EFAULT; + return sys_msgrcv (first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + return sys_msgrcv (first, + (struct msgbuf *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget ((key_t) first, second); + case MSGCTL: + return sys_msgctl (first, second, (struct msqid_ds *) ptr); + + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = do_shmat (first, (char *) ptr, second, &raddr); + if (ret) + return ret; + return put_user (raddr, (ulong *) third); + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + return -EINVAL; + return do_shmat (first, (char *) ptr, + second, (ulong *) third); + } + case SHMDT: + return sys_shmdt ((char *)ptr); + case SHMGET: + return sys_shmget (first, second, third); + case SHMCTL: + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: + return -EINVAL; + } +} + +/* Fork a new task - this creates a new program thread. + * This is called indirectly via a small wrapper + */ +asmlinkage int sys_fork(struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL); +} + +/* Clone a task - this clones the calling program thread. + * This is called indirectly via a small wrapper + */ +asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs) +{ + /* + * We don't support SETTID / CLEARTID (FIXME!!! (nicked from arm32)) + */ + if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID)) + return -EINVAL; + + if (!newsp) + newsp = regs->ARM_sp; + + return do_fork(clone_flags, newsp, regs, 0, NULL, NULL); +} + +asmlinkage int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL); +} + +/* sys_execve() executes a new program. + * This is called indirectly via a small wrapper + */ +asmlinkage int sys_execve(char *filenamei, char **argv, char **envp, struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname(filenamei); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); +out: + return error; +} + +/* FIXME - see if this is correct for arm26 */ +long execve(const char *filename, char **argv, char **envp) +{ + struct pt_regs regs; + int ret; + memset(®s, 0, sizeof(struct pt_regs)); + ret = do_execve((char *)filename, (char __user * __user *)argv, (char __user * __user *)envp, ®s); + if (ret < 0) + goto out; + + /* + * Save argc to the register structure for userspace. + */ + regs.ARM_r0 = ret; + + /* + * We were successful. We won't be returning to our caller, but + * instead to user space by manipulating the kernel stack. + */ + asm( "add r0, %0, %1\n\t" + "mov r1, %2\n\t" + "mov r2, %3\n\t" + "bl memmove\n\t" /* copy regs to top of stack */ + "mov r8, #0\n\t" /* not a syscall */ + "mov r9, %0\n\t" /* thread structure */ + "mov sp, r0\n\t" /* reposition stack pointer */ + "b ret_to_user" + : + : "r" (current_thread_info()), + "Ir" (THREAD_SIZE - 8 - sizeof(regs)), + "r" (®s), + "Ir" (sizeof(regs)) + : "r0", "r1", "r2", "r3", "ip", "memory"); + + out: + return ret; +} + +EXPORT_SYMBOL(execve); diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c new file mode 100644 index 000000000000..549a6b2e177e --- /dev/null +++ b/arch/arm26/kernel/time.c @@ -0,0 +1,234 @@ +/* + * linux/arch/arm26/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * Modifications for ARM (C) 1994-2001 Russell King + * Mods for ARM26 (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains the ARM-specific time handling details: + * reading the RTC at bootup, etc... + * + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1998-12-20 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/errno.h> +#include <linux/profile.h> + +#include <asm/hardware.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/ioc.h> + +u64 jiffies_64 = INITIAL_JIFFIES; + +EXPORT_SYMBOL(jiffies_64); + +extern unsigned long wall_jiffies; + +/* this needs a better home */ +DEFINE_SPINLOCK(rtc_lock); + +/* change this if you have some constant time drift */ +#define USECS_PER_JIFFY (1000000/HZ) + +static int dummy_set_rtc(void) +{ + return 0; +} + +/* + * hook for setting the RTC's idea of the current time. + */ +int (*set_rtc)(void) = dummy_set_rtc; + +/* + * Get time offset based on IOCs timer. + * FIXME - if this is called with interrutps off, why the shennanigans + * below ? + */ +static unsigned long gettimeoffset(void) +{ + unsigned int count1, count2, status; + long offset; + + ioc_writeb (0, IOC_T0LATCH); + barrier (); + count1 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8); + barrier (); + status = ioc_readb(IOC_IRQREQA); + barrier (); + ioc_writeb (0, IOC_T0LATCH); + barrier (); + count2 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8); + + offset = count2; + if (count2 < count1) { + /* + * We have not had an interrupt between reading count1 + * and count2. + */ + if (status & (1 << 5)) + offset -= LATCH; + } else if (count2 > count1) { + /* + * We have just had another interrupt between reading + * count1 and count2. + */ + offset -= LATCH; + } + + offset = (LATCH - offset) * (tick_nsec / 1000); + return (offset + LATCH/2) / LATCH; +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + +static unsigned long next_rtc_update; + +/* + * If we have an externally synchronized linux clock, then update + * CMOS clock accordingly every ~11 minutes. set_rtc() has to be + * called as close as possible to 500 ms before the new second + * starts. + */ +static inline void do_set_rtc(void) +{ + if (time_status & STA_UNSYNC || set_rtc == NULL) + return; + +//FIXME - timespec.tv_sec is a time_t not unsigned long + if (next_rtc_update && + time_before((unsigned long)xtime.tv_sec, next_rtc_update)) + return; + + if (xtime.tv_nsec < 500000000 - ((unsigned) tick_nsec >> 1) && + xtime.tv_nsec >= 500000000 + ((unsigned) tick_nsec >> 1)) + return; + + if (set_rtc()) + /* + * rtc update failed. Try again in 60s + */ + next_rtc_update = xtime.tv_sec + 60; + else + next_rtc_update = xtime.tv_sec + 660; +} + +#define do_leds() + +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags; + unsigned long seq; + unsigned long usec, sec, lost; + + do { + seq = read_seqbegin_irqsave(&xtime_lock, flags); + usec = gettimeoffset(); + + lost = jiffies - wall_jiffies; + if (lost) + usec += lost * USECS_PER_JIFFY; + + sec = xtime.tv_sec; + usec += xtime.tv_nsec / 1000; + } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + + /* usec may have gone up a lot: be safe */ + while (usec >= 1000000) { + usec -= 1000000; + sec++; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +EXPORT_SYMBOL(do_gettimeofday); + +int do_settimeofday(struct timespec *tv) +{ + if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + return -EINVAL; + + write_seqlock_irq(&xtime_lock); + /* + * This is revolting. We need to set "xtime" correctly. However, the + * value in this location is the value at the most recent update of + * wall time. Discover what correction gettimeofday() would have + * done, and then undo it! + */ + tv->tv_nsec -= 1000 * (gettimeoffset() + + (jiffies - wall_jiffies) * USECS_PER_JIFFY); + + while (tv->tv_nsec < 0) { + tv->tv_nsec += NSEC_PER_SEC; + tv->tv_sec--; + } + + xtime.tv_sec = tv->tv_sec; + xtime.tv_nsec = tv->tv_nsec; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_sequnlock_irq(&xtime_lock); + clock_was_set(); + return 0; +} + +EXPORT_SYMBOL(do_settimeofday); + +static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + do_timer(regs); +#ifndef CONFIG_SMP + update_process_times(user_mode(regs)); +#endif + do_set_rtc(); //FIME - EVERY timer IRQ? + profile_tick(CPU_PROFILING, regs); + return IRQ_HANDLED; //FIXME - is this right? +} + +static struct irqaction timer_irq = { + .name = "timer", + .flags = SA_INTERRUPT, + .handler = timer_interrupt, +}; + +extern void ioctime_init(void); + +/* + * Set up timer interrupt. + */ +void __init time_init(void) +{ + ioc_writeb(LATCH & 255, IOC_T0LTCHL); + ioc_writeb(LATCH >> 8, IOC_T0LTCHH); + ioc_writeb(0, IOC_T0GO); + + + setup_irq(IRQ_TIMER, &timer_irq); +} + diff --git a/arch/arm26/kernel/traps.c b/arch/arm26/kernel/traps.c new file mode 100644 index 000000000000..f64f59022392 --- /dev/null +++ b/arch/arm26/kernel/traps.c @@ -0,0 +1,548 @@ +/* + * linux/arch/arm26/kernel/traps.c + * + * Copyright (C) 1995-2002 Russell King + * Fragments that appear the same as linux/arch/i386/kernel/traps.c (C) Linus Torvalds + * Copyright (C) 2003 Ian Molton (ARM26) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 'traps.c' handles hardware exceptions after we have saved some state in + * 'linux/arch/arm26/lib/traps.S'. Mostly a debugging aid, but will probably + * kill the offending process. + */ + +#include <linux/module.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/interrupt.h> +#include <linux/init.h> + +#include <asm/atomic.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/semaphore.h> + +#include "ptrace.h" + +extern void c_backtrace (unsigned long fp, int pmode); +extern void show_pte(struct mm_struct *mm, unsigned long addr); + +const char *processor_modes[] = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" }; + +static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" "*bad reason*"}; + +/* + * Stack pointers should always be within the kernels view of + * physical memory. If it is not there, then we can't dump + * out any information relating to the stack. + */ +static int verify_stack(unsigned long sp) +{ + if (sp < PAGE_OFFSET || (sp > (unsigned long)high_memory && high_memory != 0)) + return -EFAULT; + + return 0; +} + +/* + * Dump out the contents of some memory nicely... + */ +static void dump_mem(const char *str, unsigned long bottom, unsigned long top) +{ + unsigned long p = bottom & ~31; + mm_segment_t fs; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + printk("%s", str); + printk("(0x%08lx to 0x%08lx)\n", bottom, top); + + for (p = bottom & ~31; p < top;) { + printk("%04lx: ", p & 0xffff); + + for (i = 0; i < 8; i++, p += 4) { + unsigned int val; + + if (p < bottom || p >= top) + printk(" "); + else { + __get_user(val, (unsigned long *)p); + printk("%08x ", val); + } + } + printk ("\n"); + } + + set_fs(fs); +} + +static void dump_instr(struct pt_regs *regs) +{ + unsigned long addr = instruction_pointer(regs); + const int width = 8; + mm_segment_t fs; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + printk("Code: "); + for (i = -4; i < 1; i++) { + unsigned int val, bad; + + bad = __get_user(val, &((u32 *)addr)[i]); + + if (!bad) + printk(i == 0 ? "(%0*x) " : "%0*x ", width, val); + else { + printk("bad PC value."); + break; + } + } + printk("\n"); + + set_fs(fs); +} + +/*static*/ void __dump_stack(struct task_struct *tsk, unsigned long sp) +{ + dump_mem("Stack: ", sp, 8192+(unsigned long)tsk->thread_info); +} + +void dump_stack(void) +{ +#ifdef CONFIG_DEBUG_ERRORS + __backtrace(); +#endif +} + +EXPORT_SYMBOL(dump_stack); + +//FIXME - was a static fn +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +{ + unsigned int fp; + int ok = 1; + + printk("Backtrace: "); + fp = regs->ARM_fp; + if (!fp) { + printk("no frame pointer"); + ok = 0; + } else if (verify_stack(fp)) { + printk("invalid frame pointer 0x%08x", fp); + ok = 0; + } else if (fp < (unsigned long)(tsk->thread_info + 1)) + printk("frame pointer underflow"); + printk("\n"); + + if (ok) + c_backtrace(fp, processor_mode(regs)); +} + +/* FIXME - this is probably wrong.. */ +void show_stack(struct task_struct *task, unsigned long *sp) { + dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task->thread_info); +} + +DEFINE_SPINLOCK(die_lock); + +/* + * This function is protected against re-entrancy. + */ +NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) +{ + struct task_struct *tsk = current; + + console_verbose(); + spin_lock_irq(&die_lock); + + printk("Internal error: %s: %x\n", str, err); + printk("CPU: %d\n", smp_processor_id()); + show_regs(regs); + printk("Process %s (pid: %d, stack limit = 0x%p)\n", + current->comm, current->pid, tsk->thread_info + 1); + + if (!user_mode(regs) || in_interrupt()) { + __dump_stack(tsk, (unsigned long)(regs + 1)); + dump_backtrace(regs, tsk); + dump_instr(regs); + } +while(1); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +void die_if_kernel(const char *str, struct pt_regs *regs, int err) +{ + if (user_mode(regs)) + return; + + die(str, regs, err); +} + +static DECLARE_MUTEX(undef_sem); +static int (*undef_hook)(struct pt_regs *); + +int request_undef_hook(int (*fn)(struct pt_regs *)) +{ + int ret = -EBUSY; + + down(&undef_sem); + if (undef_hook == NULL) { + undef_hook = fn; + ret = 0; + } + up(&undef_sem); + + return ret; +} + +int release_undef_hook(int (*fn)(struct pt_regs *)) +{ + int ret = -EINVAL; + + down(&undef_sem); + if (undef_hook == fn) { + undef_hook = NULL; + ret = 0; + } + up(&undef_sem); + + return ret; +} + +static int undefined_extension(struct pt_regs *regs, unsigned int op) +{ + switch (op) { + case 1: /* 0xde01 / 0x?7f001f0 */ + ptrace_break(current, regs); + return 0; + } + return 1; +} + +asmlinkage void do_undefinstr(struct pt_regs *regs) +{ + siginfo_t info; + void *pc; + + regs->ARM_pc -= 4; + + pc = (unsigned long *)instruction_pointer(regs); /* strip PSR */ + + if (user_mode(regs)) { + u32 instr; + + get_user(instr, (u32 *)pc); + + if ((instr & 0x0fff00ff) == 0x07f000f0 && + undefined_extension(regs, (instr >> 8) & 255) == 0) { + regs->ARM_pc += 4; + return; + } + } else { + if (undef_hook && undef_hook(regs) == 0) { + regs->ARM_pc += 4; + return; + } + } + +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n", + current->comm, current->pid, pc); + dump_instr(regs); +#endif + + current->thread.error_code = 0; + current->thread.trap_no = 6; + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; + + force_sig_info(SIGILL, &info, current); + + die_if_kernel("Oops - undefined instruction", regs, 0); +} + +asmlinkage void do_excpt(unsigned long address, struct pt_regs *regs, int mode) +{ + siginfo_t info; + +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n", + current->comm, current->pid, instruction_pointer(regs)); + dump_instr(regs); +#endif + + current->thread.error_code = 0; + current->thread.trap_no = 11; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + + force_sig_info(SIGBUS, &info, current); + + die_if_kernel("Oops - address exception", regs, mode); +} + +asmlinkage void do_unexp_fiq (struct pt_regs *regs) +{ +#ifndef CONFIG_IGNORE_FIQ + printk("Hmm. Unexpected FIQ received, but trying to continue\n"); + printk("You may have a hardware problem...\n"); +#endif +} + +/* + * bad_mode handles the impossible case in the vectors. If you see one of + * these, then it's extremely serious, and could mean you have buggy hardware. + * It never returns, and never tries to sync. We hope that we can at least + * dump out some state information... + */ +asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode) +{ + unsigned int vectors = vectors_base(); + + console_verbose(); + + printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n", + handler[reason<5?reason:4], processor_modes[proc_mode]); + + /* + * Dump out the vectors and stub routines. Maybe a better solution + * would be to dump them out only if we detect that they are corrupted. + */ + dump_mem(KERN_CRIT "Vectors: ", vectors, vectors + 0x40); + dump_mem(KERN_CRIT "Stubs: ", vectors + 0x200, vectors + 0x4b8); + + die("Oops", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +static int bad_syscall(int n, struct pt_regs *regs) +{ + struct thread_info *thread = current_thread_info(); + siginfo_t info; + + if (current->personality != PER_LINUX && thread->exec_domain->handler) { + thread->exec_domain->handler(n, regs); + return regs->ARM_r0; + } + +#ifdef CONFIG_DEBUG_USER + printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n", + current->pid, current->comm, n); + dump_instr(regs); +#endif + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLTRP; + info.si_addr = (void *)instruction_pointer(regs) - 4; + + force_sig_info(SIGILL, &info, current); + die_if_kernel("Oops", regs, n); + return regs->ARM_r0; +} + +static inline void +do_cache_op(unsigned long start, unsigned long end, int flags) +{ + struct vm_area_struct *vma; + + if (end < start) + return; + + vma = find_vma(current->active_mm, start); + if (vma && vma->vm_start < end) { + if (start < vma->vm_start) + start = vma->vm_start; + if (end > vma->vm_end) + end = vma->vm_end; + } +} + +/* + * Handle all unrecognised system calls. + * 0x9f0000 - 0x9fffff are some more esoteric system calls + */ +#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) +asmlinkage int arm_syscall(int no, struct pt_regs *regs) +{ + siginfo_t info; + + if ((no >> 16) != 0x9f) + return bad_syscall(no, regs); + + switch (no & 0xffff) { + case 0: /* branch through 0 */ + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = NULL; + + force_sig_info(SIGSEGV, &info, current); + + die_if_kernel("branch through zero", regs, 0); + return 0; + + case NR(breakpoint): /* SWI BREAK_POINT */ + ptrace_break(current, regs); + return regs->ARM_r0; + + case NR(cacheflush): + return 0; + + case NR(usr26): + break; + + default: + /* Calls 9f00xx..9f07ff are defined to return -ENOSYS + if not implemented, rather than raising SIGILL. This + way the calling program can gracefully determine whether + a feature is supported. */ + if (no <= 0x7ff) + return -ENOSYS; + break; + } +#ifdef CONFIG_DEBUG_USER + /* + * experience shows that these seem to indicate that + * something catastrophic has happened + */ + printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no); + dump_instr(regs); + if (user_mode(regs)) { + show_regs(regs); + c_backtrace(regs->ARM_fp, processor_mode(regs)); + } +#endif + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLTRP; + info.si_addr = (void *)instruction_pointer(regs) - 4; + + force_sig_info(SIGILL, &info, current); + die_if_kernel("Oops", regs, no); + return 0; +} + +void __bad_xchg(volatile void *ptr, int size) +{ + printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", + __builtin_return_address(0), ptr, size); + BUG(); +} + +/* + * A data abort trap was taken, but we did not handle the instruction. + * Try to abort the user program, or panic if it was the kernel. + */ +asmlinkage void +baddataabort(int code, unsigned long instr, struct pt_regs *regs) +{ + unsigned long addr = instruction_pointer(regs); + siginfo_t info; + +#ifdef CONFIG_DEBUG_USER + printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n", + current->pid, current->comm, code, instr); + dump_instr(regs); + show_pte(current->mm, addr); +#endif + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = (void *)addr; + + force_sig_info(SIGILL, &info, current); + die_if_kernel("unknown data abort code", regs, instr); +} + +volatile void __bug(const char *file, int line, void *data) +{ + printk(KERN_CRIT"kernel BUG at %s:%d!", file, line); + if (data) + printk(KERN_CRIT" - extra data = %p", data); + printk("\n"); + *(int *)0 = 0; +} + +void __readwrite_bug(const char *fn) +{ + printk("%s called, but not implemented", fn); + BUG(); +} + +void __pte_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pte %08lx.\n", file, line, val); +} + +void __pmd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pmd %08lx.\n", file, line, val); +} + +void __pgd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pgd %08lx.\n", file, line, val); +} + +asmlinkage void __div0(void) +{ + printk("Division by zero in kernel.\n"); + dump_stack(); +} + +void abort(void) +{ + BUG(); + + /* if that doesn't kill us, halt */ + panic("Oops failed to kill thread"); +} + +void __init trap_init(void) +{ + extern void __trap_init(unsigned long); + unsigned long base = vectors_base(); + + __trap_init(base); + if (base != 0) + printk(KERN_DEBUG "Relocating machine vectors to 0x%08lx\n", + base); +} diff --git a/arch/arm26/kernel/vmlinux-arm26-xip.lds.in b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in new file mode 100644 index 000000000000..ca61ec8218fe --- /dev/null +++ b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in @@ -0,0 +1,134 @@ +/* ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * borrowed from Russels ARM port by Ian Molton + */ + +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_ARCH(arm) +ENTRY(stext) +jiffies = jiffies_64; +SECTIONS +{ + . = TEXTADDR; + .init : { /* Init code and data */ + _stext = .; + __init_begin = .; + _sinittext = .; + *(.init.text) + _einittext = .; + __proc_info_begin = .; + *(.proc.info) + __proc_info_end = .; + __arch_info_begin = .; + *(.arch.info) + __arch_info_end = .; + __tagtable_begin = .; + *(.taglist) + __tagtable_end = .; + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + __early_begin = .; + *(__early_param) + __early_end = .; + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + . = ALIGN(32); + __initramfs_start = .; + usr/built-in.o(.init.ramfs) + __initramfs_end = .; + . = ALIGN(32768); + __init_end = .; + } + + /DISCARD/ : { /* Exit code and data */ + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + } + + .text : { /* Real text segment */ + _text = .; /* Text and read-only data */ + *(.text) + SCHED_TEXT + LOCK_TEXT /* FIXME - borrowed from arm32 - check*/ + *(.fixup) + *(.gnu.warning) + *(.rodata) + *(.rodata.*) + *(.glue_7) + *(.glue_7t) + *(.got) /* Global offset table */ + + _etext = .; /* End of text section */ + } + + . = ALIGN(16); + __ex_table : { /* Exception table */ + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + RODATA + + _endtext = .; + + . = DATAADDR; + + _sdata = .; + + .data : { + . = ALIGN(8192); + /* + * first, the init thread union, aligned + * to an 8192 byte boundary. (see arm26/kernel/init_task.c) + * FIXME - sould this be 32K aligned on arm26? + */ + *(.init.task) + + /* + * The cacheline aligned data + */ + . = ALIGN(32); + *(.data.cacheline_aligned) + + /* + * and the usual data section + */ + *(.data) + CONSTRUCTORS + + *(.init.data) + + _edata = .; + } + + .bss : { + __bss_start = .; /* BSS */ + *(.bss) + *(COMMON) + _end = . ; + } + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/arch/arm26/kernel/vmlinux-arm26.lds.in b/arch/arm26/kernel/vmlinux-arm26.lds.in new file mode 100644 index 000000000000..d1d3418d7eb6 --- /dev/null +++ b/arch/arm26/kernel/vmlinux-arm26.lds.in @@ -0,0 +1,127 @@ +/* ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * borrowed from Russels ARM port by Ian Molton and subsequently modified. + */ + +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_ARCH(arm) +ENTRY(stext) +jiffies = jiffies_64; +SECTIONS +{ + . = TEXTADDR; + .init : { /* Init code and data */ + _stext = .; + __init_begin = .; + _sinittext = .; + *(.init.text) + _einittext = .; + __proc_info_begin = .; + *(.proc.info) + __proc_info_end = .; + __arch_info_begin = .; + *(.arch.info) + __arch_info_end = .; + __tagtable_begin = .; + *(.taglist) + __tagtable_end = .; + *(.init.data) + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + __early_begin = .; + *(__early_param) + __early_end = .; + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + . = ALIGN(32); + __initramfs_start = .; + usr/built-in.o(.init.ramfs) + __initramfs_end = .; + . = ALIGN(32768); + __init_end = .; + } + + /DISCARD/ : { /* Exit code and data */ + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + } + + .text : { /* Real text segment */ + _text = .; /* Text and read-only data */ + *(.text) + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.gnu.warning) + *(.rodata) + *(.rodata.*) + *(.glue_7) + *(.glue_7t) + *(.got) /* Global offset table */ + + _etext = .; /* End of text section */ + } + + . = ALIGN(16); + __ex_table : { /* Exception table */ + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + RODATA + + . = ALIGN(8192); + + .data : { + /* + * first, the init task union, aligned + * to an 8192 byte boundary. (see arm26/kernel/init_task.c) + */ + *(.init.task) + + /* + * The cacheline aligned data + */ + . = ALIGN(32); + *(.data.cacheline_aligned) + + /* + * and the usual data section + */ + *(.data) + CONSTRUCTORS + + _edata = .; + } + + .bss : { + __bss_start = .; /* BSS */ + *(.bss) + *(COMMON) + _end = . ; + } + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/arch/arm26/kernel/vmlinux.lds.S b/arch/arm26/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..811a69048010 --- /dev/null +++ b/arch/arm26/kernel/vmlinux.lds.S @@ -0,0 +1,12 @@ +#include <linux/config.h> + +#ifdef CONFIG_XIP_KERNEL + +#include "vmlinux-arm26-xip.lds.in" + +#else + +#include "vmlinux-arm26.lds.in" + +#endif + diff --git a/arch/arm26/lib/Makefile b/arch/arm26/lib/Makefile new file mode 100644 index 000000000000..6df2b793d367 --- /dev/null +++ b/arch/arm26/lib/Makefile @@ -0,0 +1,26 @@ +# +# linux/arch/arm26/lib/Makefile +# +# Copyright (C) 1995-2000 Russell King +# + +lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ + csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ + copy_page.o delay.o findbit.o memchr.o memcpy.o \ + memset.o memzero.o setbit.o \ + strchr.o strrchr.o testchangebit.o \ + testclearbit.o testsetbit.o getuser.o \ + putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ + ucmpdi2.o udivdi3.o lib1funcs.o ecard.o io-acorn.o \ + floppydma.o io-readsb.o io-writesb.o io-writesl.o \ + uaccess-kernel.o uaccess-user.o io-readsw.o \ + io-writesw.o io-readsl.o ecard.o io-acorn.o \ + floppydma.o + +lib-n := + +lib-$(CONFIG_VT)+= kbd.o + +csumpartialcopy.o: csumpartialcopygeneric.S +csumpartialcopyuser.o: csumpartialcopygeneric.S + diff --git a/arch/arm26/lib/ashldi3.c b/arch/arm26/lib/ashldi3.c new file mode 100644 index 000000000000..130f5a839669 --- /dev/null +++ b/arch/arm26/lib/ashldi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__ashldi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + w.s.low = 0; + w.s.high = (USItype)uu.s.low << -bm; + } + else + { + USItype carries = (USItype)uu.s.low >> bm; + w.s.low = (USItype)uu.s.low << b; + w.s.high = ((USItype)uu.s.high << b) | carries; + } + + return w.ll; +} + diff --git a/arch/arm26/lib/ashrdi3.c b/arch/arm26/lib/ashrdi3.c new file mode 100644 index 000000000000..71625d218f8d --- /dev/null +++ b/arch/arm26/lib/ashrdi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__ashrdi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + /* w.s.high = 1..1 or 0..0 */ + w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1); + w.s.low = uu.s.high >> -bm; + } + else + { + USItype carries = (USItype)uu.s.high << bm; + w.s.high = uu.s.high >> b; + w.s.low = ((USItype)uu.s.low >> b) | carries; + } + + return w.ll; +} diff --git a/arch/arm26/lib/backtrace.S b/arch/arm26/lib/backtrace.S new file mode 100644 index 000000000000..d793fe4339fc --- /dev/null +++ b/arch/arm26/lib/backtrace.S @@ -0,0 +1,145 @@ +/* + * linux/arch/arm26/lib/backtrace.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +@ fp is 0 or stack frame + +#define frame r4 +#define next r5 +#define save r6 +#define mask r7 +#define offset r8 + +ENTRY(__backtrace) + mov r1, #0x10 + mov r0, fp + +ENTRY(c_backtrace) + +#ifdef CONFIG_NO_FRAME_POINTER + mov pc, lr +#else + + stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... + mov mask, #0xfc000003 + tst mask, r0 + movne r0, #0 + movs frame, r0 +1: moveq r0, #-2 + LOADREGS(eqfd, sp!, {r4 - r8, pc}) + +2: stmfd sp!, {pc} @ calculate offset of PC in STMIA instruction + ldr r0, [sp], #4 + adr r1, 2b - 4 + sub offset, r0, r1 + +3: tst frame, mask @ Check for address exceptions... + bne 1b + +1001: ldr next, [frame, #-12] @ get fp +1002: ldr r2, [frame, #-4] @ get lr +1003: ldr r3, [frame, #0] @ get pc + sub save, r3, offset @ Correct PC for prefetching + bic save, save, mask +1004: ldr r1, [save, #0] @ get instruction at function + mov r1, r1, lsr #10 + ldr r3, .Ldsi+4 + teq r1, r3 + subeq save, save, #4 + adr r0, .Lfe + mov r1, save + bic r2, r2, mask + bl printk @ print pc and link register + + ldr r0, [frame, #-8] @ get sp + sub r0, r0, #4 +1005: ldr r1, [save, #4] @ get instruction at function+4 + mov r3, r1, lsr #10 + ldr r2, .Ldsi+4 + teq r3, r2 @ Check for stmia sp!, {args} + addeq save, save, #4 @ next instruction + bleq .Ldumpstm + + sub r0, frame, #16 +1006: ldr r1, [save, #4] @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction + mov r3, r1, lsr #10 + ldr r2, .Ldsi + teq r3, r2 + bleq .Ldumpstm + + teq frame, next + movne frame, next + teqne frame, #0 + bne 3b + LOADREGS(fd, sp!, {r4 - r8, pc}) + +/* + * Fixup for LDMDB + */ + .section .fixup,"ax" + .align 0 +1007: ldr r0, =.Lbad + mov r1, frame + bl printk + LOADREGS(fd, sp!, {r4 - r8, pc}) + .ltorg + .previous + + .section __ex_table,"a" + .align 3 + .long 1001b, 1007b + .long 1002b, 1007b + .long 1003b, 1007b + .long 1004b, 1007b + .long 1005b, 1007b + .long 1006b, 1007b + .previous + +#define instr r4 +#define reg r5 +#define stack r6 + +.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} + mov stack, r0 + mov instr, r1 + mov reg, #9 + mov r7, #0 +1: mov r3, #1 + tst instr, r3, lsl reg + beq 2f + add r7, r7, #1 + teq r7, #4 + moveq r7, #0 + moveq r3, #'\n' + movne r3, #' ' + ldr r2, [stack], #-4 + mov r1, reg + adr r0, .Lfp + bl printk +2: subs reg, reg, #1 + bpl 1b + teq r7, #0 + adrne r0, .Lcr + blne printk + mov r0, stack + LOADREGS(fd, sp!, {instr, reg, stack, r7, pc}) + +.Lfe: .asciz "Function entered at [<%p>] from [<%p>]\n" +.Lfp: .asciz " r%d = %08X%c" +.Lcr: .asciz "\n" +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Ldsi: .word 0x00e92dd8 >> 2 + .word 0x00e92d00 >> 2 + +#endif diff --git a/arch/arm26/lib/changebit.S b/arch/arm26/lib/changebit.S new file mode 100644 index 000000000000..1b6a077be5a6 --- /dev/null +++ b/arch/arm26/lib/changebit.S @@ -0,0 +1,28 @@ +/* + * linux/arch/arm26/lib/changebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* Purpose : Function to change a bit + * Prototype: int change_bit(int bit, void *addr) + */ +ENTRY(_change_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_change_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + eor r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/clearbit.S b/arch/arm26/lib/clearbit.S new file mode 100644 index 000000000000..0a895b0c759f --- /dev/null +++ b/arch/arm26/lib/clearbit.S @@ -0,0 +1,31 @@ +/* + * linux/arch/arm26/lib/clearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Function to clear a bit + * Prototype: int clear_bit(int bit, void *addr) + */ +ENTRY(_clear_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_clear_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + bic r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm26/lib/copy_page.S b/arch/arm26/lib/copy_page.S new file mode 100644 index 000000000000..2d79ee12ea1f --- /dev/null +++ b/arch/arm26/lib/copy_page.S @@ -0,0 +1,62 @@ +/* + * linux/arch/arm26/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm_offsets.h> + + .text + .align 5 +/* + * ARMv3 optimised copy_user_page + * + * FIXME: rmk do we need to handle cache stuff... + * FIXME: im is this right on ARM26? + */ +ENTRY(__copy_user_page) + stmfd sp!, {r4, lr} @ 2 + mov r2, #PAGE_SZ/64 @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmneia r1!, {r3, r4, ip, lr} @ 4 + bne 1b @ 1 + LOADREGS(fd, sp!, {r4, pc}) @ 3 + + .align 5 +/* + * ARMv3 optimised clear_user_page + * + * FIXME: rmk do we need to handle cache stuff... + */ +ENTRY(__clear_user_page) + str lr, [sp, #-4]! + mov r1, #PAGE_SZ/64 @ 1 + mov r2, #0 @ 1 + mov r3, #0 @ 1 + mov ip, #0 @ 1 + mov lr, #0 @ 1 +1: stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + stmia r0!, {r2, r3, ip, lr} @ 4 + subs r1, r1, #1 @ 1 + bne 1b @ 1 + ldr pc, [sp], #4 + + .section ".init.text", #alloc, #execinstr + diff --git a/arch/arm26/lib/csumipv6.S b/arch/arm26/lib/csumipv6.S new file mode 100644 index 000000000000..62831155acde --- /dev/null +++ b/arch/arm26/lib/csumipv6.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm26/lib/csumipv6.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + str lr, [sp, #-4]! + adds ip, r2, r3 + ldmia r1, {r1 - r3, lr} + adcs ip, ip, r1 + adcs ip, ip, r2 + adcs ip, ip, r3 + adcs ip, ip, lr + ldmia r0, {r0 - r3} + adcs r0, ip, r0 + adcs r0, r0, r1 + adcs r0, r0, r2 + ldr r2, [sp, #4] + adcs r0, r0, r3 + adcs r0, r0, r2 + adcs r0, r0, #0 + LOADREGS(fd, sp!, {pc}) + diff --git a/arch/arm26/lib/csumpartial.S b/arch/arm26/lib/csumpartial.S new file mode 100644 index 000000000000..e53e7109e623 --- /dev/null +++ b/arch/arm26/lib/csumpartial.S @@ -0,0 +1,130 @@ +/* + * linux/arch/arm26/lib/csumpartial.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 @ save before use +td2 .req r5 @ save before use +td3 .req lr + +.zero: mov r0, sum + add sp, sp, #4 + ldr pc, [sp], #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.less8: teq len, #0 @ check for zero count + beq .zero + + /* we must have at least one byte. */ + tst buf, #1 @ odd address? + ldrneb td0, [buf], #1 + subne len, len, #1 + adcnes sum, sum, td0, lsl #byte(1) + +.less4: tst len, #6 + beq .less8_byte + + /* we are now half-word aligned */ + +.less8_wordlp: +#if __LINUX_ARM_ARCH__ >= 4 + ldrh td0, [buf], #2 + sub len, len, #2 +#else + ldrb td0, [buf], #1 + ldrb td3, [buf], #1 + sub len, len, #2 + orr td0, td0, td3, lsl #8 +#endif + adcs sum, sum, td0 + tst len, #6 + bne .less8_wordlp + +.less8_byte: tst len, #1 @ odd number of bytes + ldrneb td0, [buf], #1 @ include last byte + adcnes sum, sum, td0, lsl #byte(0) @ update checksum + +.done: adc r0, sum, #0 @ collect up the last carry + ldr td0, [sp], #4 + tst td0, #1 @ check buffer alignment + movne td0, r0, lsl #8 @ rotate checksum by 8 bits + orrne r0, td0, r0, lsr #24 + ldr pc, [sp], #4 @ return + +.not_aligned: tst buf, #1 @ odd address + ldrneb td0, [buf], #1 @ make even + subne len, len, #1 + adcnes sum, sum, td0, lsl #byte(1) @ update checksum + + tst buf, #2 @ 32-bit aligned? +#if __LINUX_ARM_ARCH__ >= 4 + ldrneh td0, [buf], #2 @ make 32-bit aligned + subne len, len, #2 +#else + ldrneb td0, [buf], #1 + ldrneb ip, [buf], #1 + subne len, len, #2 + orrne td0, td0, ip, lsl #8 +#endif + adcnes sum, sum, td0 @ update checksum + mov pc, lr + +ENTRY(csum_partial) + stmfd sp!, {buf, lr} + cmp len, #8 @ Ensure that we have at least + blo .less8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst buf, #3 @ Test destination alignment + blne .not_aligned @ aligh destination, return here + +1: bics ip, len, #31 + beq 3f + + stmfd sp!, {r4 - r5} +2: ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + sub ip, ip, #32 + teq ip, #0 + bne 2b + ldmfd sp!, {r4 - r5} + +3: tst len, #0x1c @ should not change C + beq .less4 + +4: ldr td0, [buf], #4 + sub len, len, #4 + adcs sum, sum, td0 + tst len, #0x1c + bne 4b + b .less4 diff --git a/arch/arm26/lib/csumpartialcopy.S b/arch/arm26/lib/csumpartialcopy.S new file mode 100644 index 000000000000..a1c4b5fdd498 --- /dev/null +++ b/arch/arm26/lib/csumpartialcopy.S @@ -0,0 +1,52 @@ +/* + * linux/arch/arm26/lib/csumpartialcopy.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} + .endm + + .macro load_regs,flags + LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc}) + .endm + + .macro load1b, reg1 + ldrb \reg1, [r0], #1 + .endm + + .macro load2b, reg1, reg2 + ldrb \reg1, [r0], #1 + ldrb \reg2, [r0], #1 + .endm + + .macro load1l, reg1 + ldr \reg1, [r0], #4 + .endm + + .macro load2l, reg1, reg2 + ldr \reg1, [r0], #4 + ldr \reg2, [r0], #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldmia r0!, {\reg1, \reg2, \reg3, \reg4} + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/arm26/lib/csumpartialcopygeneric.S b/arch/arm26/lib/csumpartialcopygeneric.S new file mode 100644 index 000000000000..5249c3ad11db --- /dev/null +++ b/arch/arm26/lib/csumpartialcopygeneric.S @@ -0,0 +1,352 @@ +/* + * linux/arch/arm26/lib/csumpartialcopygeneric.S + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26 + * + */ + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +/* Quick hack */ + .macro save_regs + stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} + .endm + +/* end Quick Hack */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.zero: mov r0, sum + load_regs ea + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.dst_unaligned: tst dst, #1 + beq .dst_16bit + + load1b ip + sub len, len, #1 + adcs sum, sum, ip, lsl #byte(1) @ update checksum + strb ip, [dst], #1 + tst dst, #2 + moveq pc, lr @ dst is now 32bit aligned + +.dst_16bit: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, lsl #byte(0) + strb r8, [dst], #1 + adcs sum, sum, ip, lsl #byte(1) + strb ip, [dst], #1 + mov pc, lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.less8: teq len, #0 @ check for zero count + beq .zero + + /* we must have at least one byte. */ + tst dst, #1 @ dst 16-bit aligned + beq .less8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + adcs sum, sum, ip, lsl #byte(1) @ update checksum + strb ip, [dst], #1 + tst len, #6 + beq .less8_byteonly + +1: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, lsl #byte(0) + strb r8, [dst], #1 + adcs sum, sum, ip, lsl #byte(1) + strb ip, [dst], #1 +.less8_aligned: tst len, #6 + bne 1b +.less8_byteonly: + tst len, #1 + beq .done + load1b r8 + adcs sum, sum, r8, lsl #byte(0) @ update checksum + strb r8, [dst], #1 + b .done + +FN_ENTRY + mov ip, sp + save_regs + sub fp, ip, #4 + + cmp len, #8 @ Ensure that we have at least + blo .less8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst dst, #3 @ Test destination alignment + blne .dst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + tst src, #3 @ Test source alignment + bne .src_not_aligned + + /* Routine for src & dst aligned */ + + bics ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + sub ip, ip, #16 + teq ip, #0 + bne 1b + +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r4, r5 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + tst ip, #4 + beq 4f + +3: load1l r4 + str r4, [dst], #4 + adcs sum, sum, r4 + +4: ands len, len, #3 + beq .done + load1l r4 + tst len, #2 +/* mov r5, r4, lsr #byte(0) +FIXME? 0 Shift anyhow! +*/ + beq .exit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, lsr #byte(1) + strb r5, [dst], #1 + mov r5, r4, lsr #byte(2) +.exit: tst len, #1 + strneb r5, [dst], #1 + andne r5, r5, #255 + adcnes sum, sum, r5, lsl #byte(0) + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.done: adc r0, sum, #0 + ldr sum, [sp, #0] @ dst + tst sum, #1 + movne sum, r0, lsl #8 + orrne r0, sum, r0, lsr #24 + load_regs ea + +.src_not_aligned: + adc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + bic src, src, #3 + load1l r5 + cmp ip, #2 + beq .src2_aligned + bhi .src3_aligned + mov r4, r5, pull #8 @ C = 0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + mov r7, r7, pull #8 + orr r7, r7, r8, push #24 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #8 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #8 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #24 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #8 +4: ands len, len, #3 + beq .done +/* mov r5, r4, lsr #byte(0) +FIXME? 0 Shift anyhow +*/ + tst len, #2 + beq .exit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, lsr #byte(1) + strb r5, [dst], #1 + mov r5, r4, lsr #byte(2) + b .exit + +.src2_aligned: mov r4, r5, pull #16 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + mov r7, r7, pull #16 + orr r7, r7, r8, push #16 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #16 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #16 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #16 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #16 +4: ands len, len, #3 + beq .done +/* mov r5, r4, lsr #byte(0) +FIXME? 0 Shift anyhow +*/ + tst len, #2 + beq .exit + adcs sum, sum, r4 + strb r5, [dst], #1 + mov r5, r4, lsr #byte(1) + strb r5, [dst], #1 + tst len, #1 + beq .done + load1b r5 + b .exit + +.src3_aligned: mov r4, r5, pull #24 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + mov r7, r7, pull #24 + orr r7, r7, r8, push #8 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #24 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #24 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #8 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #24 +4: ands len, len, #3 + beq .done +/* mov r5, r4, lsr #byte(0) +FIXME? 0 Shift anyhow +*/ + tst len, #2 + beq .exit + strb r5, [dst], #1 + adcs sum, sum, r4 + load1l r4 +/* mov r5, r4, lsr #byte(0) +FIXME? 0 Shift anyhow +*/ + strb r5, [dst], #1 + adcs sum, sum, r4, push #24 + mov r5, r4, lsr #byte(1) + b .exit diff --git a/arch/arm26/lib/csumpartialcopyuser.S b/arch/arm26/lib/csumpartialcopyuser.S new file mode 100644 index 000000000000..5b821188e479 --- /dev/null +++ b/arch/arm26/lib/csumpartialcopyuser.S @@ -0,0 +1,115 @@ +/* + * linux/arch/arm26/lib/csumpartialcopyuser.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/asm_offsets.h> + + .text + + .macro save_regs + stmfd sp!, {r1 - r2, r4 - r9, fp, ip, lr, pc} + mov r9, sp, lsr #13 + mov r9, r9, lsl #13 + ldr r9, [r9, #TSK_ADDR_LIMIT] + mov r9, r9, lsr #24 + .endm + + .macro load_regs,flags + ldm\flags fp, {r1, r2, r4-r9, fp, sp, pc}^ + .endm + + .macro load1b, reg1 + tst r9, #0x01 +9999: ldreqbt \reg1, [r0], #1 + ldrneb \reg1, [r0], #1 + .section __ex_table, "a" + .align 3 + .long 9999b, 6001f + .previous + .endm + + .macro load2b, reg1, reg2 + tst r9, #0x01 +9999: ldreqbt \reg1, [r0], #1 + ldrneb \reg1, [r0], #1 +9998: ldreqbt \reg2, [r0], #1 + ldrneb \reg2, [r0], #1 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .previous + .endm + + .macro load1l, reg1 + tst r9, #0x01 +9999: ldreqt \reg1, [r0], #4 + ldrne \reg1, [r0], #4 + .section __ex_table, "a" + .align 3 + .long 9999b, 6001f + .previous + .endm + + .macro load2l, reg1, reg2 + tst r9, #0x01 + ldmneia r0!, {\reg1, \reg2} +9999: ldreqt \reg1, [r0], #4 +9998: ldreqt \reg2, [r0], #4 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .previous + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + tst r9, #0x01 + ldmneia r0!, {\reg1, \reg2, \reg3, \reg4} +9999: ldreqt \reg1, [r0], #4 +9998: ldreqt \reg2, [r0], #4 +9997: ldreqt \reg3, [r0], #4 +9996: ldreqt \reg4, [r0], #4 + .section __ex_table, "a" + .long 9999b, 6001f + .long 9998b, 6001f + .long 9997b, 6001f + .long 9996b, 6001f + .previous + .endm + +/* + * unsigned int + * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .align 4 +6001: mov r4, #-EFAULT + ldr r5, [fp, #4] @ *err_ptr + str r4, [r5] + ldmia sp, {r1, r2} @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +6002: teq r2, r1 + strneb r0, [r1], #1 + bne 6002b + load_regs ea diff --git a/arch/arm26/lib/delay.S b/arch/arm26/lib/delay.S new file mode 100644 index 000000000000..66f2b68e1b13 --- /dev/null +++ b/arch/arm26/lib/delay.S @@ -0,0 +1,57 @@ +/* + * linux/arch/arm26/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +LC0: .word loops_per_jiffy + +/* + * 0 <= r0 <= 2000 + */ +ENTRY(udelay) + mov r2, #0x6800 + orr r2, r2, #0x00db + mul r1, r0, r2 + ldr r2, LC0 + ldr r2, [r2] + mov r1, r1, lsr #11 + mov r2, r2, lsr #11 + mul r0, r1, r2 + movs r0, r0, lsr #6 + RETINSTR(moveq,pc,lr) + +/* + * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32 + * + * Oh, if only we had a cycle counter... + */ + +@ Delay routine +ENTRY(__delay) + subs r0, r0, #1 +#if 0 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 + RETINSTR(movls,pc,lr) + subs r0, r0, #1 +#endif + bhi __delay + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/ecard.S b/arch/arm26/lib/ecard.S new file mode 100644 index 000000000000..b4633150f01c --- /dev/null +++ b/arch/arm26/lib/ecard.S @@ -0,0 +1,41 @@ +/* + * linux/arch/arm26/lib/ecard.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> /* for CONFIG_CPU_nn */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +#define CPSR2SPSR(rt) + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + LOADREGS(fd, sp!, {r4 - r12, pc}) + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + LOADREGS(fd, sp!, {r4 - r12, pc}) + diff --git a/arch/arm26/lib/findbit.S b/arch/arm26/lib/findbit.S new file mode 100644 index 000000000000..26f67cccc37c --- /dev/null +++ b/arch/arm26/lib/findbit.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16th March 2001 - John Ripley <jripley@sonicblue.com> + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ +ENTRY(_find_first_zero_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: ldrb r3, [r0, r2, lsr #3] + eors r3, r3, #0xff @ invert bits + bne .found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + RETINSTR(mov,pc,lr) + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_zero_bit_le) + teq r1, #0 + beq 2b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ldrb r3, [r0, r2, lsr #3] + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.found: tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 + RETINSTR(mov,pc,lr) + diff --git a/arch/arm26/lib/floppydma.S b/arch/arm26/lib/floppydma.S new file mode 100644 index 000000000000..e99ebbb20353 --- /dev/null +++ b/arch/arm26/lib/floppydma.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm26/lib/floppydma.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + + .global floppy_fiqin_end +ENTRY(floppy_fiqin_start) + subs r9, r9, #1 + ldrgtb r12, [r11, #-4] + ldrleb r12, [r11], #0 + strb r12, [r10], #1 + subs pc, lr, #4 +floppy_fiqin_end: + + .global floppy_fiqout_end +ENTRY(floppy_fiqout_start) + subs r9, r9, #1 + ldrgeb r12, [r10], #1 + movlt r12, #0 + strleb r12, [r11], #0 + subles pc, lr, #4 + strb r12, [r11, #-4] + subs pc, lr, #4 +floppy_fiqout_end: diff --git a/arch/arm26/lib/gcclib.h b/arch/arm26/lib/gcclib.h new file mode 100644 index 000000000000..9895e78904b5 --- /dev/null +++ b/arch/arm26/lib/gcclib.h @@ -0,0 +1,21 @@ +/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#define BITS_PER_UNIT 8 +#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT) + +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef int word_type __attribute__ ((mode (__word__))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); + +struct DIstruct {SItype low, high;}; + +typedef union +{ + struct DIstruct s; + DItype ll; +} DIunion; + diff --git a/arch/arm26/lib/getuser.S b/arch/arm26/lib/getuser.S new file mode 100644 index 000000000000..e6d59b334851 --- /dev/null +++ b/arch/arm26/lib/getuser.S @@ -0,0 +1,112 @@ +/* + * linux/arch/arm26/lib/getuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make them more + * efficient, especially as they return an error value in addition to + * the "real" return value. + * + * __get_user_X + * + * Inputs: r0 contains the address + * Outputs: r0 is the error code + * r1, r2 contains the zero-extended value + * lr corrupted + * + * No other registers must be altered. (see include/asm-arm/uaccess.h + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000. + * Note also that it is intended that __get_user_bad is not global. + */ +#include <asm/asm_offsets.h> +#include <asm/thread_info.h> +#include <asm/errno.h> + + .global __get_user_1 +__get_user_1: + bic r1, sp, #0x1f00 + bic r1, r1, #0x00ff + str lr, [sp, #-4]! + ldr r1, [r1, #TI_ADDR_LIMIT] + sub r1, r1, #1 + cmp r0, r1 + bge __get_user_bad + cmp r0, #0x02000000 +1: ldrlsbt r1, [r0] + ldrgeb r1, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __get_user_2 +__get_user_2: + bic r2, sp, #0x1f00 + bic r2, r2, #0x00ff + str lr, [sp, #-4]! + ldr r2, [r2, #TI_ADDR_LIMIT] + sub r2, r2, #2 + cmp r0, r2 + bge __get_user_bad + cmp r0, #0x02000000 +2: ldrlsbt r1, [r0], #1 +3: ldrlsbt r2, [r0] + ldrgeb r1, [r0], #1 + ldrgeb r2, [r0] + orr r1, r1, r2, lsl #8 + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __get_user_4 +__get_user_4: + bic r1, sp, #0x1f00 + bic r1, r1, #0x00ff + str lr, [sp, #-4]! + ldr r1, [r1, #TI_ADDR_LIMIT] + sub r1, r1, #4 + cmp r0, r1 + bge __get_user_bad + cmp r0, #0x02000000 +4: ldrlst r1, [r0] + ldrge r1, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __get_user_8 +__get_user_8: + bic r2, sp, #0x1f00 + bic r2, r2, #0x00ff + str lr, [sp, #-4]! + ldr r2, [r2, #TI_ADDR_LIMIT] + sub r2, r2, #8 + cmp r0, r2 + bge __get_user_bad_8 + cmp r0, #0x02000000 +5: ldrlst r1, [r0], #4 +6: ldrlst r2, [r0] + ldrge r1, [r0], #4 + ldrge r2, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + +__get_user_bad_8: + mov r2, #0 +__get_user_bad: + mov r1, #0 + mov r0, #-EFAULT + ldmfd sp!, {pc}^ + +.section __ex_table, "a" + .long 1b, __get_user_bad + .long 2b, __get_user_bad + .long 3b, __get_user_bad + .long 4b, __get_user_bad + .long 5b, __get_user_bad_8 + .long 6b, __get_user_bad_8 +.previous diff --git a/arch/arm26/lib/io-acorn.S b/arch/arm26/lib/io-acorn.S new file mode 100644 index 000000000000..f6c3e30b1b4f --- /dev/null +++ b/arch/arm26/lib/io-acorn.S @@ -0,0 +1,71 @@ +/* + * linux/arch/arm26/lib/io-acorn.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> /* for CONFIG_CPU_nn */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + + .text + .align + + .equ diff_pcio_base, PCIO_BASE - IO_BASE + + .macro outw2 rd + mov r8, \rd, lsl #16 + orr r8, r8, r8, lsr #16 + str r8, [r3, r0, lsl #2] + mov r8, \rd, lsr #16 + orr r8, r8, r8, lsl #16 + str r8, [r3, r0, lsl #2] + .endm + + .macro inw2 rd, mask, temp + ldr \rd, [r0] + and \rd, \rd, \mask + ldr \temp, [r0] + orr \rd, \rd, \temp, lsl #16 + .endm + + .macro addr rd + tst \rd, #0x80000000 + mov \rd, \rd, lsl #2 + add \rd, \rd, #IO_BASE + addeq \rd, \rd, #diff_pcio_base + .endm + +.iosl_warning: + .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .iosl_warning + mov r1, lr + b printk + +@ Purpose: write a memc register +@ Proto : void memc_write(int register, int value); +@ Returns: nothing + +ENTRY(memc_write) + cmp r0, #7 + RETINSTR(movgt,pc,lr) + mov r0, r0, lsl #17 + mov r1, r1, lsl #15 + mov r1, r1, lsr #17 + orr r0, r0, r1, lsl #2 + add r0, r0, #0x03600000 + strb r0, [r0] + RETINSTR(mov,pc,lr) + diff --git a/arch/arm26/lib/io-readsb.S b/arch/arm26/lib/io-readsb.S new file mode 100644 index 000000000000..4c4d99c05856 --- /dev/null +++ b/arch/arm26/lib/io-readsb.S @@ -0,0 +1,116 @@ +/* + * linux/arch/arm26/lib/io-readsb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.insb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .insb_aligned + +ENTRY(__raw_readsb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .insb_align + +.insb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .insb_no_16 + +.insb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + ldrb r5, [r0] + ldrb r6, [r0] + orr r5, r5, r6, lsl #8 + ldrb r6, [r0] + orr r5, r5, r6, lsl #16 + ldrb r6, [r0] + orr r5, r5, r6, lsl #24 + ldrb r6, [r0] + ldrb ip, [r0] + orr r6, r6, ip, lsl #8 + ldrb ip, [r0] + orr r6, r6, ip, lsl #16 + ldrb ip, [r0] + orr r6, r6, ip, lsl #24 + stmia r1!, {r3 - r6} + + subs r2, r2, #16 + bpl .insb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.insb_no_16: tst r2, #8 + beq .insb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + stmia r1!, {r3, r4} + +.insb_no_8: tst r2, #4 + beq .insb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + str r3, [r1], #4 + +.insb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + + LOADREGS(fd, sp!, {r4 - r6, pc}) diff --git a/arch/arm26/lib/io-readsl.S b/arch/arm26/lib/io-readsl.S new file mode 100644 index 000000000000..7be208bd23c6 --- /dev/null +++ b/arch/arm26/lib/io-readsl.S @@ -0,0 +1,78 @@ +/* + * linux/arch/arm26/lib/io-readsl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +/* + * Note that some reads can be aligned on half-word boundaries. + */ +ENTRY(__raw_readsl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 2f + +1: ldr r3, [r0] + str r3, [r1], #4 + subs r2, r2, #1 + bne 1b + mov pc, lr + +2: cmp ip, #2 + ldr ip, [r0] + blt 4f + bgt 6f + + strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov ip, ip, lsr #8 +3: subs r2, r2, #1 + ldrne r3, [r0] + orrne ip, ip, r3, lsl #16 + strne ip, [r1], #4 + movne ip, r3, lsr #16 + bne 3b + strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov pc, lr + +4: strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov ip, ip, lsr #8 +5: subs r2, r2, #1 + ldrne r3, [r0] + orrne ip, ip, r3, lsl #8 + strne ip, [r1], #4 + movne ip, r3, lsr #24 + bne 5b + strb ip, [r1], #1 + mov pc, lr + +6: strb ip, [r1], #1 + mov ip, ip, lsr #8 +7: subs r2, r2, #1 + ldrne r3, [r0] + orrne ip, ip, r3, lsl #24 + strne ip, [r1], #4 + movne ip, r3, lsr #8 + bne 7b + strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov ip, ip, lsr #8 + strb ip, [r1], #1 + mov pc, lr + diff --git a/arch/arm26/lib/io-readsw.S b/arch/arm26/lib/io-readsw.S new file mode 100644 index 000000000000..c65c1f28fcff --- /dev/null +++ b/arch/arm26/lib/io-readsw.S @@ -0,0 +1,107 @@ +/* + * linux/arch/arm26/lib/io-readsw.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.insw_bad_alignment: + adr r0, .insw_bad_align_msg + mov r2, lr + b panic +.insw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.insw_align: tst r1, #1 + bne .insw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + RETINSTR(moveq, pc, lr) + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .insw_align + +.insw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_insw_8 + +.insw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .insw_8_lp + + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_insw_8: tst r2, #4 + beq .no_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.no_insw_4: tst r2, #2 + beq .no_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.no_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + + diff --git a/arch/arm26/lib/io-writesb.S b/arch/arm26/lib/io-writesb.S new file mode 100644 index 000000000000..16251b4d5101 --- /dev/null +++ b/arch/arm26/lib/io-writesb.S @@ -0,0 +1,122 @@ +/* + * linux/arch/arm26/lib/io-writesb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.outsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .outsb_aligned + +ENTRY(__raw_writesb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .outsb_align + +.outsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .outsb_no_16 + +.outsb_16_lp: ldmia r1!, {r3 - r6} + + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + + subs r2, r2, #16 + bpl .outsb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.outsb_no_16: tst r2, #8 + beq .outsb_no_8 + + ldmia r1!, {r3, r4} + + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + +.outsb_no_8: tst r2, #4 + beq .outsb_no_4 + + ldr r3, [r1], #4 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + +.outsb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + + LOADREGS(fd, sp!, {r4 - r6, pc}) diff --git a/arch/arm26/lib/io-writesl.S b/arch/arm26/lib/io-writesl.S new file mode 100644 index 000000000000..4d6049b16e71 --- /dev/null +++ b/arch/arm26/lib/io-writesl.S @@ -0,0 +1,56 @@ +/* + * linux/arch/arm26/lib/io-writesl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +ENTRY(__raw_writesl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 2f + +1: ldr r3, [r1], #4 + str r3, [r0] + subs r2, r2, #1 + bne 1b + mov pc, lr + +2: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1], #4 + bgt 4f + blt 5f + +3: mov ip, r3, lsr #16 + ldr r3, [r1], #4 + orr ip, ip, r3, lsl #16 + str ip, [r0] + subs r2, r2, #1 + bne 3b + mov pc, lr + +4: mov ip, r3, lsr #24 + ldr r3, [r1], #4 + orr ip, ip, r3, lsl #8 + str ip, [r0] + subs r2, r2, #1 + bne 4b + mov pc, lr + +5: mov ip, r3, lsr #8 + ldr r3, [r1], #4 + orr ip, ip, r3, lsl #24 + str ip, [r0] + subs r2, r2, #1 + bne 5b + mov pc, lr + + diff --git a/arch/arm26/lib/io-writesw.S b/arch/arm26/lib/io-writesw.S new file mode 100644 index 000000000000..a24f891f6b1c --- /dev/null +++ b/arch/arm26/lib/io-writesw.S @@ -0,0 +1,127 @@ +/* + * linux/arch/arm26/lib/io-writesw.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> + +.outsw_bad_alignment: + adr r0, .outsw_bad_align_msg + mov r2, lr + b panic +.outsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.outsw_align: tst r1, #1 + bne .outsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + RETINSTR(moveq, pc, lr) + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .outsw_align + +.outsw_aligned: stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_outsw_8 + +.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .outsw_8_lp + + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_outsw_8: tst r2, #4 + beq .no_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_4: tst r2, #2 + beq .no_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) diff --git a/arch/arm26/lib/kbd.c b/arch/arm26/lib/kbd.c new file mode 100644 index 000000000000..22d2c93aaf1a --- /dev/null +++ b/arch/arm26/lib/kbd.c @@ -0,0 +1,279 @@ +#include <linux/config.h> +#include <linux/kd.h> +//#include <linux/kbd_ll.h> +#include <linux/kbd_kern.h> + +/* + * Translation of escaped scancodes to keycodes. + * This is now user-settable. + * The keycodes 1-88,96-111,119 are fairly standard, and + * should probably not be changed - changing might confuse X. + * X also interprets scancode 0x5d (KEY_Begin). + * + * For 1-88 keycode equals scancode. + */ + +#define E0_KPENTER 96 +#define E0_RCTRL 97 +#define E0_KPSLASH 98 +#define E0_PRSCR 99 +#define E0_RALT 100 +#define E0_BREAK 101 /* (control-pause) */ +#define E0_HOME 102 +#define E0_UP 103 +#define E0_PGUP 104 +#define E0_LEFT 105 +#define E0_RIGHT 106 +#define E0_END 107 +#define E0_DOWN 108 +#define E0_PGDN 109 +#define E0_INS 110 +#define E0_DEL 111 + +/* for USB 106 keyboard */ +#define E0_YEN 124 +#define E0_BACKSLASH 89 + + +#define E1_PAUSE 119 + +/* + * The keycodes below are randomly located in 89-95,112-118,120-127. + * They could be thrown away (and all occurrences below replaced by 0), + * but that would force many users to use the `setkeycodes' utility, where + * they needed not before. It does not matter that there are duplicates, as + * long as no duplication occurs for any single keyboard. + */ +#define SC_LIM 89 + +#define FOCUS_PF1 85 /* actual code! */ +#define FOCUS_PF2 89 +#define FOCUS_PF3 90 +#define FOCUS_PF4 91 +#define FOCUS_PF5 92 +#define FOCUS_PF6 93 +#define FOCUS_PF7 94 +#define FOCUS_PF8 95 +#define FOCUS_PF9 120 +#define FOCUS_PF10 121 +#define FOCUS_PF11 122 +#define FOCUS_PF12 123 + +#define JAP_86 124 +/* tfj@olivia.ping.dk: + * The four keys are located over the numeric keypad, and are + * labelled A1-A4. It's an rc930 keyboard, from + * Regnecentralen/RC International, Now ICL. + * Scancodes: 59, 5a, 5b, 5c. + */ +#define RGN1 124 +#define RGN2 125 +#define RGN3 126 +#define RGN4 127 + +static unsigned char high_keys[128 - SC_LIM] = { + RGN1, RGN2, RGN3, RGN4, 0, 0, 0, /* 0x59-0x5f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 */ + 0, 0, 0, 0, 0, FOCUS_PF11, 0, FOCUS_PF12, /* 0x68-0x6f */ + 0, 0, 0, FOCUS_PF2, FOCUS_PF9, 0, 0, FOCUS_PF3, /* 0x70-0x77 */ + FOCUS_PF4, FOCUS_PF5, FOCUS_PF6, FOCUS_PF7, /* 0x78-0x7b */ + FOCUS_PF8, JAP_86, FOCUS_PF10, 0 /* 0x7c-0x7f */ +}; + +/* BTC */ +#define E0_MACRO 112 +/* LK450 */ +#define E0_F13 113 +#define E0_F14 114 +#define E0_HELP 115 +#define E0_DO 116 +#define E0_F17 117 +#define E0_KPMINPLUS 118 +/* + * My OmniKey generates e0 4c for the "OMNI" key and the + * right alt key does nada. [kkoller@nyx10.cs.du.edu] + */ +#define E0_OK 124 +/* + * New microsoft keyboard is rumoured to have + * e0 5b (left window button), e0 5c (right window button), + * e0 5d (menu button). [or: LBANNER, RBANNER, RMENU] + * [or: Windows_L, Windows_R, TaskMan] + */ +#define E0_MSLW 125 +#define E0_MSRW 126 +#define E0_MSTM 127 + +static unsigned char e0_keys[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x07 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x08-0x0f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x17 */ + 0, 0, 0, 0, E0_KPENTER, E0_RCTRL, 0, 0, /* 0x18-0x1f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x27 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x28-0x2f */ + 0, 0, 0, 0, 0, E0_KPSLASH, 0, E0_PRSCR, /* 0x30-0x37 */ + E0_RALT, 0, 0, 0, 0, E0_F13, E0_F14, E0_HELP, /* 0x38-0x3f */ + E0_DO, E0_F17, 0, 0, 0, 0, E0_BREAK, E0_HOME, /* 0x40-0x47 */ + E0_UP, E0_PGUP, 0, E0_LEFT, E0_OK, E0_RIGHT, E0_KPMINPLUS, E0_END, /* 0x48-0x4f */ + E0_DOWN, E0_PGDN, E0_INS, E0_DEL, 0, 0, 0, 0, /* 0x50-0x57 */ + 0, 0, 0, E0_MSLW, E0_MSRW, E0_MSTM, 0, 0, /* 0x58-0x5f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 */ + 0, 0, 0, 0, 0, 0, 0, E0_MACRO, /* 0x68-0x6f */ + //0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 */ + 0, 0, 0, 0, 0, E0_BACKSLASH, 0, 0, /* 0x70-0x77 */ + 0, 0, 0, E0_YEN, 0, 0, 0, 0 /* 0x78-0x7f */ +}; + +static int gen_setkeycode(unsigned int scancode, unsigned int keycode) +{ + if (scancode < SC_LIM || scancode > 255 || keycode > 127) + return -EINVAL; + if (scancode < 128) + high_keys[scancode - SC_LIM] = keycode; + else + e0_keys[scancode - 128] = keycode; + return 0; +} + +static int gen_getkeycode(unsigned int scancode) +{ + return + (scancode < SC_LIM || scancode > 255) ? -EINVAL : + (scancode < + 128) ? high_keys[scancode - SC_LIM] : e0_keys[scancode - 128]; +} + +static int +gen_translate(unsigned char scancode, unsigned char *keycode, char raw_mode) +{ + static int prev_scancode; + + /* special prefix scancodes.. */ + if (scancode == 0xe0 || scancode == 0xe1) { + prev_scancode = scancode; + return 0; + } + + /* 0xFF is sent by a few keyboards, ignore it. 0x00 is error */ + if (scancode == 0x00 || scancode == 0xff) { + prev_scancode = 0; + return 0; + } + + scancode &= 0x7f; + + if (prev_scancode) { + /* + * usually it will be 0xe0, but a Pause key generates + * e1 1d 45 e1 9d c5 when pressed, and nothing when released + */ + if (prev_scancode != 0xe0) { + if (prev_scancode == 0xe1 && scancode == 0x1d) { + prev_scancode = 0x100; + return 0; + } + else if (prev_scancode == 0x100 + && scancode == 0x45) { + *keycode = E1_PAUSE; + prev_scancode = 0; + } else { +#ifdef KBD_REPORT_UNKN + if (!raw_mode) + printk(KERN_INFO + "keyboard: unknown e1 escape sequence\n"); +#endif + prev_scancode = 0; + return 0; + } + } else { + prev_scancode = 0; + /* + * The keyboard maintains its own internal caps lock and + * num lock statuses. In caps lock mode E0 AA precedes make + * code and E0 2A follows break code. In num lock mode, + * E0 2A precedes make code and E0 AA follows break code. + * We do our own book-keeping, so we will just ignore these. + */ + /* + * For my keyboard there is no caps lock mode, but there are + * both Shift-L and Shift-R modes. The former mode generates + * E0 2A / E0 AA pairs, the latter E0 B6 / E0 36 pairs. + * So, we should also ignore the latter. - aeb@cwi.nl + */ + if (scancode == 0x2a || scancode == 0x36) + return 0; + + if (e0_keys[scancode]) + *keycode = e0_keys[scancode]; + else { +#ifdef KBD_REPORT_UNKN + if (!raw_mode) + printk(KERN_INFO + "keyboard: unknown scancode e0 %02x\n", + scancode); +#endif + return 0; + } + } + } else if (scancode >= SC_LIM) { + /* This happens with the FOCUS 9000 keyboard + Its keys PF1..PF12 are reported to generate + 55 73 77 78 79 7a 7b 7c 74 7e 6d 6f + Moreover, unless repeated, they do not generate + key-down events, so we have to zero up_flag below */ + /* Also, Japanese 86/106 keyboards are reported to + generate 0x73 and 0x7d for \ - and \ | respectively. */ + /* Also, some Brazilian keyboard is reported to produce + 0x73 and 0x7e for \ ? and KP-dot, respectively. */ + + *keycode = high_keys[scancode - SC_LIM]; + + if (!*keycode) { + if (!raw_mode) { +#ifdef KBD_REPORT_UNKN + printk(KERN_INFO + "keyboard: unrecognized scancode (%02x)" + " - ignored\n", scancode); +#endif + } + return 0; + } + } else + *keycode = scancode; + return 1; +} + +static char gen_unexpected_up(unsigned char keycode) +{ + /* unexpected, but this can happen: maybe this was a key release for a + FOCUS 9000 PF key; if we want to see it, we have to clear up_flag */ + if (keycode >= SC_LIM || keycode == 85) + return 0; + else + return 0200; +} + +/* + * These are the default mappings + */ +int (*k_setkeycode)(unsigned int, unsigned int) = gen_setkeycode; +int (*k_getkeycode)(unsigned int) = gen_getkeycode; +int (*k_translate)(unsigned char, unsigned char *, char) = gen_translate; +char (*k_unexpected_up)(unsigned char) = gen_unexpected_up; +void (*k_leds)(unsigned char); + +/* Simple translation table for the SysRq keys */ + +#ifdef CONFIG_MAGIC_SYSRQ +static unsigned char gen_sysrq_xlate[128] = + "\000\0331234567890-=\177\t" /* 0x00 - 0x0f */ + "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ + "dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */ + "bnm,./\000*\000 \000\201\202\203\204\205" /* 0x30 - 0x3f */ + "\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */ + "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */ + "\r\000/"; /* 0x60 - 0x6f */ + +unsigned char *k_sysrq_xlate = gen_sysrq_xlate; +int k_sysrq_key = 0x54; +#endif diff --git a/arch/arm26/lib/lib1funcs.S b/arch/arm26/lib/lib1funcs.S new file mode 100644 index 000000000000..b8f9518db871 --- /dev/null +++ b/arch/arm26/lib/lib1funcs.S @@ -0,0 +1,314 @@ +@ libgcc1 routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) + +/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file with other programs, and to distribute +those programs without any restriction coming from the use of this +file. (The General Public License restrictions do apply in other +respects; for example, they cover modification of the file, and +distribution when not linked into another program.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* This code is derived from gcc 2.95.3 */ +/* I Molton 29/07/01 */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware.h> +#include <linux/config.h> + +#define RET movs +#define RETc(x) mov##x##s +#define RETCOND ^ + +dividend .req r0 +divisor .req r1 +result .req r2 +overdone .req r2 +curbit .req r3 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 + +ENTRY(__udivsi3) + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + mov result, #0 + cmp dividend, divisor + bcc Lgot_result_udivsi3 +1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc 1b + +2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc 2b + +3: + @ Test for possible subtractions, and note which bits + @ are done in the result. On the final pass, this may subtract + @ too much from the dividend, but the result will be ok, since the + @ "bit" will have been shifted out at the bottom. + cmp dividend, divisor + subcs dividend, dividend, divisor + orrcs result, result, curbit + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs result, result, curbit, lsr #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs result, result, curbit, lsr #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs result, result, curbit, lsr #3 + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne 3b +Lgot_result_udivsi3: + mov r0, result + RET pc, lr + +Ldiv0: + str lr, [sp, #-4]! + bl __div0 + mov r0, #0 @ about as wrong as it could be + ldmia sp!, {pc}RETCOND + +/* __umodsi3 ----------------------- */ + +ENTRY(__umodsi3) + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + cmp dividend, divisor + RETc(cc) pc, lr +1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc 1b + +2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc 2b + +3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend, so keep track of which + @ subtractions are done, we can fix them up afterwards... + mov overdone, #0 + cmp dividend, divisor + subcs dividend, dividend, divisor + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs overdone, overdone, curbit, ror #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs overdone, overdone, curbit, ror #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs overdone, overdone, curbit, ror #3 + mov ip, curbit + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne 3b + + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + @ If we terminated early, because dividend became zero, + @ then none of the below will match, since the bit in ip will not be + @ in the bottom nibble. + ands overdone, overdone, #0xe0000000 + RETc(eq) pc, lr @ No fixups needed + tst overdone, ip, ror #3 + addne dividend, dividend, divisor, lsr #3 + tst overdone, ip, ror #2 + addne dividend, dividend, divisor, lsr #2 + tst overdone, ip, ror #1 + addne dividend, dividend, divisor, lsr #1 + RET pc, lr + +ENTRY(__divsi3) + eor ip, dividend, divisor @ Save the sign of the result. + mov curbit, #1 + mov result, #0 + cmp divisor, #0 + rsbmi divisor, divisor, #0 @ Loops below use unsigned. + beq Ldiv0 + cmp dividend, #0 + rsbmi dividend, dividend, #0 + cmp dividend, divisor + bcc Lgot_result_divsi3 + +1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc 1b + +2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc 2b + +3: + @ Test for possible subtractions, and note which bits + @ are done in the result. On the final pass, this may subtract + @ too much from the dividend, but the result will be ok, since the + @ "bit" will have been shifted out at the bottom. + cmp dividend, divisor + subcs dividend, dividend, divisor + orrcs result, result, curbit + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs result, result, curbit, lsr #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs result, result, curbit, lsr #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs result, result, curbit, lsr #3 + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne 3b +Lgot_result_divsi3: + mov r0, result + cmp ip, #0 + rsbmi r0, r0, #0 + RET pc, lr + +ENTRY(__modsi3) + mov curbit, #1 + cmp divisor, #0 + rsbmi divisor, divisor, #0 @ Loops below use unsigned. + beq Ldiv0 + @ Need to save the sign of the dividend, unfortunately, we need + @ ip later on; this is faster than pushing lr and using that. + str dividend, [sp, #-4]! + cmp dividend, #0 + rsbmi dividend, dividend, #0 + cmp dividend, divisor + bcc Lgot_result_modsi3 + +1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc 1b + +2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc 2b + +3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend, so keep track of which + @ subtractions are done, we can fix them up afterwards... + mov overdone, #0 + cmp dividend, divisor + subcs dividend, dividend, divisor + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs overdone, overdone, curbit, ror #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs overdone, overdone, curbit, ror #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs overdone, overdone, curbit, ror #3 + mov ip, curbit + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne 3b + + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + @ If we terminated early, because dividend became zero, + @ then none of the below will match, since the bit in ip will not be + @ in the bottom nibble. + ands overdone, overdone, #0xe0000000 + beq Lgot_result_modsi3 + tst overdone, ip, ror #3 + addne dividend, dividend, divisor, lsr #3 + tst overdone, ip, ror #2 + addne dividend, dividend, divisor, lsr #2 + tst overdone, ip, ror #1 + addne dividend, dividend, divisor, lsr #1 +Lgot_result_modsi3: + ldr ip, [sp], #4 + cmp ip, #0 + rsbmi dividend, dividend, #0 + RET pc, lr diff --git a/arch/arm26/lib/longlong.h b/arch/arm26/lib/longlong.h new file mode 100644 index 000000000000..05ec1abd6a2c --- /dev/null +++ b/arch/arm26/lib/longlong.h @@ -0,0 +1,184 @@ +/* longlong.h -- based on code from gcc-2.95.3 + + definitions for mixed size 32/64 bit arithmetic. + Copyright (C) 1991, 92, 94, 95, 96, 1997, 1998 Free Software Foundation, Inc. + + This definition file is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2, or (at your option) any later version. + + This definition file is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Borrowed from GCC 2.95.3, I Molton 29/07/01 */ + +#ifndef SI_TYPE_SIZE +#define SI_TYPE_SIZE 32 +#endif + +#define __BITS4 (SI_TYPE_SIZE / 4) +#define __ll_B (1L << (SI_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((USItype) (t) % __ll_B) +#define __ll_highpart(t) ((USItype) (t) / __ll_B) + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) + multiplies two USItype integers MULTIPLER and MULTIPLICAND, + and generates a two-part USItype product in HIGH_PROD and + LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two USItype integers A and B, + and returns a UDItype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a two-word unsigned integer, composed by the + integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and + places the quotient in QUOTIENT and the remainder in REMAINDER. + HIGH_NUMERATOR must be less than DENOMINATOR for correct operation. + If, in addition, the most significant bit of DENOMINATOR must be 1, + then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The + quotient is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from + the msb to the first non-zero bit. This is the number of steps X + needs to be shifted left to set the msb. Undefined for X == 0. + + 6) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two two-word unsigned integers, + composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and + LOW_ADDEND_2 respectively. The result is placed in HIGH_SUM and + LOW_SUM. Overflow (i.e. carry out) is not stored anywhere, and is + lost. + + 7) sub_ddmmss(high_difference, low_difference, high_minuend, + low_minuend, high_subtrahend, low_subtrahend) subtracts two + two-word unsigned integers, composed by HIGH_MINUEND_1 and + LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2 + respectively. The result is placed in HIGH_DIFFERENCE and + LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +#if defined (__arm__) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5 \n\ + adc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "%r" ((USItype) (al)), \ + "rI" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5 \n\ + sbc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rI" ((USItype) (bl))) +#define umul_ppmm(xh, xl, a, b) \ +{register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm \n\ + mov %2, %5, lsr #16 \n\ + mov %0, %6, lsr #16 \n\ + bic %3, %5, %2, lsl #16 \n\ + bic %4, %6, %0, lsl #16 \n\ + mul %1, %3, %4 \n\ + mul %4, %2, %4 \n\ + mul %3, %0, %3 \n\ + mul %0, %2, %0 \n\ + adds %3, %4, %3 \n\ + addcs %0, %0, #65536 \n\ + adds %1, %1, %3, lsl #16 \n\ + adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)));} +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +#define __umulsidi3(u, v) \ + ({DIunion __w; \ + umul_ppmm (__w.s.high, __w.s.low, u, v); \ + __w.ll; }) + +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + USItype __d1, __d0, __q1, __q0; \ + USItype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (USItype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (USItype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (USItype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c + +extern const UQItype __clz_tab[]; +#define count_leading_zeros(count, x) \ + do { \ + USItype __xr = (x); \ + USItype __a; \ + \ + if (SI_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((USItype)1<<2*__BITS4) \ + ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4) \ + : (__xr < ((USItype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else \ + { \ + for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) diff --git a/arch/arm26/lib/lshrdi3.c b/arch/arm26/lib/lshrdi3.c new file mode 100644 index 000000000000..b666f1bad451 --- /dev/null +++ b/arch/arm26/lib/lshrdi3.c @@ -0,0 +1,61 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +DItype +__lshrdi3 (DItype u, word_type b) +{ + DIunion w; + word_type bm; + DIunion uu; + + if (b == 0) + return u; + + uu.ll = u; + + bm = (sizeof (SItype) * BITS_PER_UNIT) - b; + if (bm <= 0) + { + w.s.high = 0; + w.s.low = (USItype)uu.s.high >> -bm; + } + else + { + USItype carries = (USItype)uu.s.high << bm; + w.s.high = (USItype)uu.s.high >> b; + w.s.low = ((USItype)uu.s.low >> b) | carries; + } + + return w.ll; +} + diff --git a/arch/arm26/lib/memchr.S b/arch/arm26/lib/memchr.S new file mode 100644 index 000000000000..34e7c14c08ad --- /dev/null +++ b/arch/arm26/lib/memchr.S @@ -0,0 +1,25 @@ +/* + * linux/arch/arm26/lib/memchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(memchr) +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r0], #1 + teq r3, r1 + bne 1b + sub r0, r0, #1 +2: movne r0, #0 + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/memcpy.S b/arch/arm26/lib/memcpy.S new file mode 100644 index 000000000000..3f719e412069 --- /dev/null +++ b/arch/arm26/lib/memcpy.S @@ -0,0 +1,318 @@ +/* + * linux/arch/arm26/lib/memcpy.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +#define ENTER \ + mov ip,sp ;\ + stmfd sp!,{r4-r9,fp,ip,lr,pc} ;\ + sub fp,ip,#4 + +#define EXIT \ + LOADREGS(ea, fp, {r4 - r9, fp, sp, pc}) + +#define EXITEQ \ + LOADREGS(eqea, fp, {r4 - r9, fp, sp, pc}) + +/* + * Prototype: void memcpy(void *to,const void *from,unsigned long n); + * ARM3: cant use memcopy here!!! + */ +ENTRY(memcpy) +ENTRY(memmove) + ENTER + cmp r1, r0 + bcc 19f + subs r2, r2, #4 + blt 6f + ands ip, r0, #3 + bne 7f + ands ip, r1, #3 + bne 8f + +1: subs r2, r2, #8 + blt 5f + subs r2, r2, #0x14 + blt 3f +2: ldmia r1!,{r3 - r9, ip} + stmia r0!,{r3 - r9, ip} + subs r2, r2, #32 + bge 2b + cmn r2, #16 + ldmgeia r1!, {r3 - r6} + stmgeia r0!, {r3 - r6} + subge r2, r2, #0x10 +3: adds r2, r2, #0x14 +4: ldmgeia r1!, {r3 - r5} + stmgeia r0!, {r3 - r5} + subges r2, r2, #12 + bge 4b +5: adds r2, r2, #8 + blt 6f + subs r2, r2, #4 + ldrlt r3, [r1], #4 + ldmgeia r1!, {r4, r5} + strlt r3, [r0], #4 + stmgeia r0!, {r4, r5} + subge r2, r2, #4 + +6: adds r2, r2, #4 + EXITEQ + cmp r2, #2 + ldrb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrgtb r5, [r1], #1 + strb r3, [r0], #1 + strgeb r4, [r0], #1 + strgtb r5, [r0], #1 + EXIT + +7: rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrgtb r5, [r1], #1 + strb r3, [r0], #1 + strgeb r4, [r0], #1 + strgtb r5, [r0], #1 + subs r2, r2, ip + blt 6b + ands ip, r1, #3 + beq 1b + +8: bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt 15f + beq 11f + cmp r2, #12 + blt 10f + sub r2, r2, #12 +9: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + subs r2, r2, #16 + bge 9b + adds r2, r2, #12 + blt 100f +10: mov r3, r7, pull #8 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #24 + str r3, [r0], #4 + bge 10b +100: sub r1, r1, #3 + b 6b + +11: cmp r2, #12 + blt 13f /* */ + sub r2, r2, #12 +12: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + subs r2, r2, #16 + bge 12b + adds r2, r2, #12 + blt 14f +13: mov r3, r7, pull #16 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #16 + str r3, [r0], #4 + bge 13b +14: sub r1, r1, #2 + b 6b + +15: cmp r2, #12 + blt 17f + sub r2, r2, #12 +16: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + subs r2, r2, #16 + bge 16b + adds r2, r2, #12 + blt 18f +17: mov r3, r7, pull #24 + ldr r7, [r1], #4 + subs r2, r2, #4 + orr r3, r3, r7, push #8 + str r3, [r0], #4 + bge 17b +18: sub r1, r1, #1 + b 6b + + +19: add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 24f + ands ip, r0, #3 + bne 25f + ands ip, r1, #3 + bne 26f + +20: subs r2, r2, #8 + blt 23f + subs r2, r2, #0x14 + blt 22f +21: ldmdb r1!, {r3 - r9, ip} + stmdb r0!, {r3 - r9, ip} + subs r2, r2, #32 + bge 21b +22: cmn r2, #16 + ldmgedb r1!, {r3 - r6} + stmgedb r0!, {r3 - r6} + subge r2, r2, #16 + adds r2, r2, #20 + ldmgedb r1!, {r3 - r5} + stmgedb r0!, {r3 - r5} + subge r2, r2, #12 +23: adds r2, r2, #8 + blt 24f + subs r2, r2, #4 + ldrlt r3, [r1, #-4]! + ldmgedb r1!, {r4, r5} + strlt r3, [r0, #-4]! + stmgedb r0!, {r4, r5} + subge r2, r2, #4 + +24: adds r2, r2, #4 + EXITEQ + cmp r2, #2 + ldrb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrgtb r5, [r1, #-1]! + strb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + strgtb r5, [r0, #-1]! + EXIT + +25: cmp ip, #2 + ldrb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrgtb r5, [r1, #-1]! + strb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + strgtb r5, [r0, #-1]! + subs r2, r2, ip + blt 24b + ands ip, r1, #3 + beq 20b + +26: bic r1, r1, #3 + ldr r3, [r1], #0 + cmp ip, #2 + blt 34f + beq 30f + cmp r2, #12 + blt 28f + sub r2, r2, #12 +27: mov r7, r3, push #8 + ldmdb r1!, {r3, r4, r5, r6} + orr r7, r7, r6, pull #24 + mov r6, r6, push #8 + orr r6, r6, r5, pull #24 + mov r5, r5, push #8 + orr r5, r5, r4, pull #24 + mov r4, r4, push #8 + orr r4, r4, r3, pull #24 + stmdb r0!, {r4, r5, r6, r7} + subs r2, r2, #16 + bge 27b + adds r2, r2, #12 + blt 29f +28: mov ip, r3, push #8 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #24 + str ip, [r0, #-4]! + bge 28b +29: add r1, r1, #3 + b 24b + +30: cmp r2, #12 + blt 32f + sub r2, r2, #12 +31: mov r7, r3, push #16 + ldmdb r1!, {r3, r4, r5, r6} + orr r7, r7, r6, pull #16 + mov r6, r6, push #16 + orr r6, r6, r5, pull #16 + mov r5, r5, push #16 + orr r5, r5, r4, pull #16 + mov r4, r4, push #16 + orr r4, r4, r3, pull #16 + stmdb r0!, {r4, r5, r6, r7} + subs r2, r2, #16 + bge 31b + adds r2, r2, #12 + blt 33f +32: mov ip, r3, push #16 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #16 + str ip, [r0, #-4]! + bge 32b +33: add r1, r1, #2 + b 24b + +34: cmp r2, #12 + blt 36f + sub r2, r2, #12 +35: mov r7, r3, push #24 + ldmdb r1!, {r3, r4, r5, r6} + orr r7, r7, r6, pull #8 + mov r6, r6, push #24 + orr r6, r6, r5, pull #8 + mov r5, r5, push #24 + orr r5, r5, r4, pull #8 + mov r4, r4, push #24 + orr r4, r4, r3, pull #8 + stmdb r0!, {r4, r5, r6, r7} + subs r2, r2, #16 + bge 35b + adds r2, r2, #12 + blt 37f +36: mov ip, r3, push #24 + ldr r3, [r1, #-4]! + subs r2, r2, #4 + orr ip, ip, r3, pull #8 + str ip, [r0, #-4]! + bge 36b +37: add r1, r1, #1 + b 24b + + .align diff --git a/arch/arm26/lib/memset.S b/arch/arm26/lib/memset.S new file mode 100644 index 000000000000..aedec10b58f5 --- /dev/null +++ b/arch/arm26/lib/memset.S @@ -0,0 +1,80 @@ +/* + * linux/arch/arm26/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 + +1: subs r2, r2, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [r0], #1 @ 1 + strleb r1, [r0], #1 @ 1 + strb r1, [r0], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(memset) + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #16 + blt 4f +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #64 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + LOADREGS(eqfd, sp!, {pc}) @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #32 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +4: tst r2, #8 + stmneia r0!, {r1, r3} + tst r2, #4 + strne r1, [r0], #4 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r2, #2 + strneb r1, [r0], #1 + strneb r1, [r0], #1 + tst r2, #1 + strneb r1, [r0], #1 + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/memzero.S b/arch/arm26/lib/memzero.S new file mode 100644 index 000000000000..cc5bf6860061 --- /dev/null +++ b/arch/arm26/lib/memzero.S @@ -0,0 +1,80 @@ +/* + * linux/arch/arm26/lib/memzero.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 +/* + * Align the pointer in r0. r3 contains the number of bytes that we are + * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we + * don't bother; we use byte stores instead. + */ +1: subs r1, r1, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r2, [r0], #1 @ 1 + strleb r2, [r0], #1 @ 1 + strb r2, [r0], #1 @ 1 + add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(__memzero) + mov r2, #0 @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. + */ + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! @ 1 + mov ip, r2 @ 1 + mov lr, r2 @ 1 + +3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + bgt 3b @ 1 + LOADREGS(eqfd, sp!, {pc}) @ 1/2 quick exit +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r1, #32 @ 1 + stmneia r0!, {r2, r3, ip, lr} @ 4 + stmneia r0!, {r2, r3, ip, lr} @ 4 + tst r1, #16 @ 1 16 bytes or more? + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 + +4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + strne r2, [r0], #4 @ 1 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r1, #2 @ 1 2 bytes or more? + strneb r2, [r0], #1 @ 1 + strneb r2, [r0], #1 @ 1 + tst r1, #1 @ 1 a byte left over + strneb r2, [r0], #1 @ 1 + RETINSTR(mov,pc,lr) @ 1 diff --git a/arch/arm26/lib/muldi3.c b/arch/arm26/lib/muldi3.c new file mode 100644 index 000000000000..44d611b1cfdb --- /dev/null +++ b/arch/arm26/lib/muldi3.c @@ -0,0 +1,77 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +#define umul_ppmm(xh, xl, a, b) \ +{register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm \n\ + mov %2, %5, lsr #16 \n\ + mov %0, %6, lsr #16 \n\ + bic %3, %5, %2, lsl #16 \n\ + bic %4, %6, %0, lsl #16 \n\ + mul %1, %3, %4 \n\ + mul %4, %2, %4 \n\ + mul %3, %0, %3 \n\ + mul %0, %2, %0 \n\ + adds %3, %4, %3 \n\ + addcs %0, %0, #65536 \n\ + adds %1, %1, %3, lsl #16 \n\ + adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)));} + + +#define __umulsidi3(u, v) \ + ({DIunion __w; \ + umul_ppmm (__w.s.high, __w.s.low, u, v); \ + __w.ll; }) + + +DItype +__muldi3 (DItype u, DItype v) +{ + DIunion w; + DIunion uu, vv; + + uu.ll = u, + vv.ll = v; + + w.ll = __umulsidi3 (uu.s.low, vv.s.low); + w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high + + (USItype) uu.s.high * (USItype) vv.s.low); + + return w.ll; +} + diff --git a/arch/arm26/lib/putuser.S b/arch/arm26/lib/putuser.S new file mode 100644 index 000000000000..87588cbe46ae --- /dev/null +++ b/arch/arm26/lib/putuser.S @@ -0,0 +1,109 @@ +/* + * linux/arch/arm26/lib/putuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make + * them more efficient, especially as they return an error + * value in addition to the "real" return value. + * + * __put_user_X + * + * Inputs: r0 contains the address + * r1, r2 contains the value + * Outputs: r0 is the error code + * lr corrupted + * + * No other registers must be altered. (see include/asm-arm/uaccess.h + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000 + * Note also that it is intended that __put_user_bad is not global. + */ +#include <asm/asm_offsets.h> +#include <asm/thread_info.h> +#include <asm/errno.h> + + .global __put_user_1 +__put_user_1: + bic r2, sp, #0x1f00 + bic r2, r2, #0x00ff + str lr, [sp, #-4]! + ldr r2, [r2, #TI_ADDR_LIMIT] + sub r2, r2, #1 + cmp r0, r2 + bge __put_user_bad +1: cmp r0, #0x02000000 + strlsbt r1, [r0] + strgeb r1, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __put_user_2 +__put_user_2: + bic r2, sp, #0x1f00 + bic r2, r2, #0x00ff + str lr, [sp, #-4]! + ldr r2, [r2, #TI_ADDR_LIMIT] + sub r2, r2, #2 + cmp r0, r2 + bge __put_user_bad +2: cmp r0, #0x02000000 + strlsbt r1, [r0], #1 + strgeb r1, [r0], #1 + mov r1, r1, lsr #8 +3: strlsbt r1, [r0] + strgeb r1, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __put_user_4 +__put_user_4: + bic r2, sp, #0x1f00 + bic r2, r2, #0x00ff + str lr, [sp, #-4]! + ldr r2, [r2, #TI_ADDR_LIMIT] + sub r2, r2, #4 + cmp r0, r2 +4: bge __put_user_bad + cmp r0, #0x02000000 + strlst r1, [r0] + strge r1, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + + .global __put_user_8 +__put_user_8: + bic ip, sp, #0x1f00 + bic ip, ip, #0x00ff + str lr, [sp, #-4]! + ldr ip, [ip, #TI_ADDR_LIMIT] + sub ip, ip, #8 + cmp r0, ip + bge __put_user_bad + cmp r0, #0x02000000 +5: strlst r1, [r0], #4 +6: strlst r2, [r0] + strge r1, [r0], #4 + strge r2, [r0] + mov r0, #0 + ldmfd sp!, {pc}^ + +__put_user_bad: + mov r0, #-EFAULT + mov pc, lr + +.section __ex_table, "a" + .long 1b, __put_user_bad + .long 2b, __put_user_bad + .long 3b, __put_user_bad + .long 4b, __put_user_bad + .long 5b, __put_user_bad + .long 6b, __put_user_bad +.previous diff --git a/arch/arm26/lib/setbit.S b/arch/arm26/lib/setbit.S new file mode 100644 index 000000000000..e180c1a1b2f1 --- /dev/null +++ b/arch/arm26/lib/setbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm26/lib/setbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Function to set a bit + * Prototype: int set_bit(int bit, void *addr) + */ +ENTRY(_set_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_set_bit_le) + and r2, r0, #7 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip, r2 + ldrb r2, [r1, r0, lsr #3] + orr r2, r2, r3 + strb r2, [r1, r0, lsr #3] + restore_irqs ip + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/strchr.S b/arch/arm26/lib/strchr.S new file mode 100644 index 000000000000..ecfff21aa7c7 --- /dev/null +++ b/arch/arm26/lib/strchr.S @@ -0,0 +1,25 @@ +/* + * linux/arch/arm26/lib/strchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strchr) +1: ldrb r2, [r0], #1 + teq r2, r1 + teqne r2, #0 + bne 1b + teq r2, #0 + moveq r0, #0 + subne r0, r0, #1 + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/strrchr.S b/arch/arm26/lib/strrchr.S new file mode 100644 index 000000000000..db43b28e78dc --- /dev/null +++ b/arch/arm26/lib/strrchr.S @@ -0,0 +1,25 @@ +/* + * linux/arch/arm26/lib/strrchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strrchr) + mov r3, #0 +1: ldrb r2, [r0], #1 + teq r2, r1 + subeq r3, r0, #1 + teq r2, #0 + bne 1b + mov r0, r3 + RETINSTR(mov,pc,lr) diff --git a/arch/arm26/lib/testchangebit.S b/arch/arm26/lib/testchangebit.S new file mode 100644 index 000000000000..17049a2d93a4 --- /dev/null +++ b/arch/arm26/lib/testchangebit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm26/lib/testchangebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_change_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_change_bit_le) + add r1, r1, r0, lsr #3 + and r3, r0, #7 + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + eor r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm26/lib/testclearbit.S b/arch/arm26/lib/testclearbit.S new file mode 100644 index 000000000000..2506bd743ab4 --- /dev/null +++ b/arch/arm26/lib/testclearbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm26/lib/testclearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_clear_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_clear_bit_le) + add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #7 @ Get bit offset + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + bic r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm26/lib/testsetbit.S b/arch/arm26/lib/testsetbit.S new file mode 100644 index 000000000000..f827de64b22d --- /dev/null +++ b/arch/arm26/lib/testsetbit.S @@ -0,0 +1,29 @@ +/* + * linux/arch/arm26/lib/testsetbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +ENTRY(_test_and_set_bit_be) + eor r0, r0, #0x18 @ big endian byte ordering +ENTRY(_test_and_set_bit_le) + add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #7 @ Get bit offset + mov r0, #1 + save_and_disable_irqs ip, r2 + ldrb r2, [r1] + tst r2, r0, lsl r3 + orr r2, r2, r0, lsl r3 + strb r2, [r1] + restore_irqs ip + moveq r0, #0 + RETINSTR(mov,pc,lr) + + diff --git a/arch/arm26/lib/uaccess-kernel.S b/arch/arm26/lib/uaccess-kernel.S new file mode 100644 index 000000000000..3950a1f6bc99 --- /dev/null +++ b/arch/arm26/lib/uaccess-kernel.S @@ -0,0 +1,173 @@ +/* + * linux/arch/arm26/lib/uaccess-kernel.S + * + * Copyright (C) 1998 Russell King + * + * Note! Some code fragments found in here have a special calling + * convention - they are not APCS compliant! + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t) + + .globl uaccess_kernel +uaccess_kernel: + .word uaccess_kernel_put_byte + .word uaccess_kernel_get_byte + .word uaccess_kernel_put_half + .word uaccess_kernel_get_half + .word uaccess_kernel_put_word + .word uaccess_kernel_get_word + .word uaccess_kernel_put_dword + .word uaccess_kernel_copy + .word uaccess_kernel_copy + .word uaccess_kernel_clear + .word uaccess_kernel_strncpy + .word uaccess_kernel_strnlen + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_kernel_put_byte: + stmfd sp!, {lr} + strb r0, [r1] + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_kernel_put_half: + stmfd sp!, {lr} + strb r0, [r1] + mov r0, r0, lsr #8 + strb r0, [r1, #1] + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_kernel_put_word: + stmfd sp!, {lr} + str r0, [r1] + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_kernel_put_dword: + stmfd sp!, {lr} + str r0, [r1], #4 + str r0, [r1], #0 + ldmfd sp!, {pc}^ + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_kernel_get_byte: + stmfd sp!, {lr} + ldrb r0, [r0] + ldmfd sp!, {pc}^ + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_kernel_get_half: + stmfd sp!, {lr} + ldr r0, [r0] + mov r0, r0, lsl #16 + mov r0, r0, lsr #16 + ldmfd sp!, {pc}^ + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_kernel_get_word: + stmfd sp!, {lr} + ldr r0, [r0] + ldmfd sp!, {pc}^ + + +/* Prototype: int uaccess_kernel_copy(void *to, const char *from, size_t n) + * Purpose : copy a block to kernel memory from kernel memory + * Params : to - kernel memory + * : from - kernel memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ +uaccess_kernel_copy: + stmfd sp!, {lr} + bl memcpy + mov r0, #0 + ldmfd sp!, {pc}^ + +/* Prototype: int uaccess_kernel_clear(void *addr, size_t sz) + * Purpose : clear some kernel memory + * Params : addr - kernel memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +uaccess_kernel_clear: + stmfd sp!, {lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #1 + strb r2, [r0], #1 + strleb r2, [r0], #1 + strltb r2, [r0], #1 + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 + bmi 2f + str r2, [r0], #4 + str r2, [r0], #4 + b 1b +2: adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 + strpl r2, [r0], #4 + tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x + strneb r2, [r0], #1 + strneb r2, [r0], #1 + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 + strneb r2, [r0], #1 + mov r0, #0 + ldmfd sp!, {pc}^ + +/* Prototype: size_t uaccess_kernel_strncpy(char *dst, char *src, size_t len) + * Purpose : copy a string from kernel memory to kernel memory + * Params : dst - kernel memory destination + * : src - kernel memory source + * : len - maximum length of string + * Returns : number of characters copied + */ +uaccess_kernel_strncpy: + stmfd sp!, {lr} + mov ip, r2 +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r1], #1 + strb r3, [r0], #1 + teq r3, #0 + bne 1b +2: subs r0, ip, r2 + ldmfd sp!, {pc}^ + +/* Prototype: int uaccess_kernel_strlen(char *str, long n) + * Purpose : get length of a string in kernel memory + * Params : str - address of string in kernel memory + * Returns : length of string *including terminator*, + * or zero on exception, or n + 1 if too long + */ +uaccess_kernel_strnlen: + stmfd sp!, {lr} + mov r2, r0 +1: ldrb r1, [r0], #1 + teq r1, #0 + beq 2f + subs r1, r1, #1 + bne 1b + add r0, r0, #1 +2: sub r0, r0, r2 + ldmfd sp!, {pc}^ + diff --git a/arch/arm26/lib/uaccess-user.S b/arch/arm26/lib/uaccess-user.S new file mode 100644 index 000000000000..130b8f28610a --- /dev/null +++ b/arch/arm26/lib/uaccess-user.S @@ -0,0 +1,718 @@ +/* + * linux/arch/arm26/lib/uaccess-user.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Routines to block copy data to/from user memory + * These are highly optimised both for the 4k page size + * and for various alignments. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/page.h> + + .text + +//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t) + .globl uaccess_user +uaccess_user: + .word uaccess_user_put_byte + .word uaccess_user_get_byte + .word uaccess_user_put_half + .word uaccess_user_get_half + .word uaccess_user_put_word + .word uaccess_user_get_word + .word uaccess_user_put_dword + .word uaccess_user_copy_from_user + .word uaccess_user_copy_to_user + .word uaccess_user_clear_user + .word uaccess_user_strncpy_from_user + .word uaccess_user_strnlen_user + + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_user_put_byte: + stmfd sp!, {lr} +USER( strbt r0, [r1]) + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_user_put_half: + stmfd sp!, {lr} +USER( strbt r0, [r1], #1) + mov r0, r0, lsr #8 +USER( strbt r0, [r1]) + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_user_put_word: + stmfd sp!, {lr} +USER( strt r0, [r1]) + ldmfd sp!, {pc}^ + +@ In : r0 = x, r1 = addr, r2 = error +@ Out: r2 = error +uaccess_user_put_dword: + stmfd sp!, {lr} +USER( strt r0, [r1], #4) +USER( strt r0, [r1], #0) + ldmfd sp!, {pc}^ + +9001: mov r2, #-EFAULT + ldmfd sp!, {pc}^ + + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_user_get_byte: + stmfd sp!, {lr} +USER( ldrbt r0, [r0]) + ldmfd sp!, {pc}^ + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_user_get_half: + stmfd sp!, {lr} +USER( ldrt r0, [r0]) + mov r0, r0, lsl #16 + mov r0, r0, lsr #16 + ldmfd sp!, {pc}^ + +@ In : r0 = addr, r1 = error +@ Out: r0 = x, r1 = error +uaccess_user_get_word: + stmfd sp!, {lr} +USER( ldrt r0, [r0]) + ldmfd sp!, {pc}^ + +9001: mov r1, #-EFAULT + ldmfd sp!, {pc}^ + +/* Prototype: int uaccess_user_copy_to_user(void *to, const char *from, size_t n) + * Purpose : copy a block to user memory from kernel memory + * Params : to - user memory + * : from - kernel memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ + +.c2u_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( strgtbt r3, [r0], #1) @ May fault + sub r2, r2, ip + b .c2u_dest_aligned + +ENTRY(uaccess_user_copy_to_user) + stmfd sp!, {r2, r4 - r7, lr} + cmp r2, #4 + blt .c2u_not_enough + ands ip, r0, #3 + bne .c2u_dest_not_aligned +.c2u_dest_aligned: + + ands ip, r1, #3 + bne .c2u_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.c2u_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_0nowords + ldr r3, [r1], #4 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .c2u_0rem8lp + +.c2u_0cpy8lp: ldmia r1!, {r3 - r6} + stmia r0!, {r3 - r6} @ Shouldnt fault + ldmia r1!, {r3 - r6} + stmia r0!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #32 + bpl .c2u_0cpy8lp +.c2u_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} + stmgeia r0!, {r3 - r6} @ Shouldnt fault + tst ip, #8 + ldmneia r1!, {r3 - r4} + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + ldrne r3, [r1], #4 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_0fupi +.c2u_0nowords: teq ip, #0 + beq .c2u_finished +.c2u_nowords: cmp ip, #2 + ldrb r3, [r1], #1 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_not_enough: + movs ip, r2 + bne .c2u_nowords +.c2u_finished: mov r0, #0 + LOADREGS(fd,sp!,{r2, r4 - r7, pc}) + +.c2u_src_not_aligned: + bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt .c2u_3fupi + beq .c2u_2fupi +.c2u_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_1nowords + mov r3, r7, pull #8 + ldr r7, [r1], #4 + orr r3, r3, r7, push #24 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_1rem8lp + +.c2u_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #16 + bpl .c2u_1cpy8lp +.c2u_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #8 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #24 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_1fupi +.c2u_1nowords: mov r3, r7, lsr #byte(1) + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + movge r3, r7, lsr #byte(2) +USER( strgebt r3, [r0], #1) @ May fault + movgt r3, r7, lsr #byte(3) +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_2nowords + mov r3, r7, pull #16 + ldr r7, [r1], #4 + orr r3, r3, r7, push #16 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_2rem8lp + +.c2u_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #16 + bpl .c2u_2cpy8lp +.c2u_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #16 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #16 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_2fupi +.c2u_2nowords: mov r3, r7, lsr #byte(2) + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + movge r3, r7, lsr #byte(3) +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + +.c2u_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .c2u_3nowords + mov r3, r7, pull #24 + ldr r7, [r1], #4 + orr r3, r3, r7, push #8 +USER( strt r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .c2u_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .c2u_3rem8lp + +.c2u_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #16 + bpl .c2u_3cpy8lp +.c2u_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #24 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #8 + strnet r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .c2u_3fupi +.c2u_3nowords: mov r3, r7, lsr #byte(3) + teq ip, #0 + beq .c2u_finished + cmp ip, #2 +USER( strbt r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( strgebt r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( strgtbt r3, [r0], #1) @ May fault + b .c2u_finished + + .section .fixup,"ax" + .align 0 +9001: LOADREGS(fd,sp!, {r0, r4 - r7, pc}) + .previous + +/* Prototype: unsigned long uaccess_user_copy_from_user(void *to,const void *from,unsigned long n); + * Purpose : copy a block from user memory to kernel memory + * Params : to - kernel memory + * : from - user memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ +.cfu_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 +USER( ldrbt r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + sub r2, r2, ip + b .cfu_dest_aligned + +ENTRY(uaccess_user_copy_from_user) + stmfd sp!, {r0, r2, r4 - r7, lr} + cmp r2, #4 + blt .cfu_not_enough + ands ip, r0, #3 + bne .cfu_dest_not_aligned +.cfu_dest_aligned: + ands ip, r1, #3 + bne .cfu_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.cfu_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_0nowords +USER( ldrt r3, [r1], #4) + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .cfu_0rem8lp + +.cfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault + stmia r0!, {r3 - r6} + ldmia r1!, {r3 - r6} @ Shouldnt fault + stmia r0!, {r3 - r6} + subs ip, ip, #32 + bpl .cfu_0cpy8lp +.cfu_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} @ Shouldnt fault + stmgeia r0!, {r3 - r6} + tst ip, #8 + ldmneia r1!, {r3 - r4} @ Shouldnt fault + stmneia r0!, {r3 - r4} + tst ip, #4 + ldrnet r3, [r1], #4 @ Shouldnt fault + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_0fupi +.cfu_0nowords: teq ip, #0 + beq .cfu_finished +.cfu_nowords: cmp ip, #2 +USER( ldrbt r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_not_enough: + movs ip, r2 + bne .cfu_nowords +.cfu_finished: mov r0, #0 + add sp, sp, #8 + LOADREGS(fd,sp!,{r4 - r7, pc}) + +.cfu_src_not_aligned: + bic r1, r1, #3 +USER( ldrt r7, [r1], #4) @ May fault + cmp ip, #2 + bgt .cfu_3fupi + beq .cfu_2fupi +.cfu_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_1nowords + mov r3, r7, pull #8 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #24 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_1rem8lp + +.cfu_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .cfu_1cpy8lp +.cfu_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #8 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #24 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_1fupi +.cfu_1nowords: mov r3, r7, lsr #byte(1) + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, lsr #byte(2) + strgeb r3, [r0], #1 + movgt r3, r7, lsr #byte(3) + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_2nowords + mov r3, r7, pull #16 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #16 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_2rem8lp + +.cfu_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .cfu_2cpy8lp +.cfu_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #16 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #16 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_2fupi +.cfu_2nowords: mov r3, r7, lsr #byte(2) + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, lsr #byte(3) + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #0) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + +.cfu_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .cfu_3nowords + mov r3, r7, pull #24 +USER( ldrt r7, [r1], #4) @ May fault + orr r3, r3, r7, push #8 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .cfu_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .cfu_3rem8lp + +.cfu_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .cfu_3cpy8lp +.cfu_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #24 +USER( ldrnet r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #8 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .cfu_3fupi +.cfu_3nowords: mov r3, r7, lsr #byte(3) + teq ip, #0 + beq .cfu_finished + cmp ip, #2 + strb r3, [r0], #1 +USER( ldrgebt r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( ldrgtbt r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .cfu_finished + + .section .fixup,"ax" + .align 0 + /* + * We took an exception. r0 contains a pointer to + * the byte not copied. + */ +9001: ldr r2, [sp], #4 @ void *to + sub r2, r0, r2 @ bytes copied + ldr r1, [sp], #4 @ unsigned long count + subs r4, r1, r2 @ bytes left to copy + movne r1, r4 + blne __memzero + mov r0, r4 + LOADREGS(fd,sp!, {r4 - r7, pc}) + .previous + +/* Prototype: int uaccess_user_clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +ENTRY(uaccess_user_clear_user) + stmfd sp!, {r1, lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #2 +USER( strbt r2, [r0], #1) +USER( strlebt r2, [r0], #1) +USER( strltbt r2, [r0], #1) + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 +USER( strplt r2, [r0], #4) +USER( strplt r2, [r0], #4) + bpl 1b + adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 +USER( strplt r2, [r0], #4) +2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x +USER( strnebt r2, [r0], #1) +USER( strnebt r2, [r0], #1) + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 +USER( strnebt r2, [r0], #1) + mov r0, #0 + LOADREGS(fd,sp!, {r1, pc}) + + .section .fixup,"ax" + .align 0 +9001: LOADREGS(fd,sp!, {r0, pc}) + .previous + +/* + * Copy a string from user space to kernel space. + * r0 = dst, r1 = src, r2 = byte length + * returns the number of characters copied (strlen of copied string), + * -EFAULT on exception, or "len" if we fill the whole buffer + */ +ENTRY(uaccess_user_strncpy_from_user) + save_lr + mov ip, r1 +1: subs r2, r2, #1 +USER( ldrplbt r3, [r1], #1) + bmi 2f + strb r3, [r0], #1 + teq r3, #0 + bne 1b + sub r1, r1, #1 @ take NUL character out of count +2: sub r0, r1, ip + restore_pc + + .section .fixup,"ax" + .align 0 +9001: mov r3, #0 + strb r3, [r0, #0] @ null terminate + mov r0, #-EFAULT + restore_pc + .previous + +/* Prototype: unsigned long uaccess_user_strnlen_user(const char *str, long n) + * Purpose : get length of a string in user memory + * Params : str - address of string in user memory + * Returns : length of string *including terminator* + * or zero on exception, or n + 1 if too long + */ +ENTRY(uaccess_user_strnlen_user) + save_lr + mov r2, r0 +1: +USER( ldrbt r3, [r0], #1) + teq r3, #0 + beq 2f + subs r1, r1, #1 + bne 1b + add r0, r0, #1 +2: sub r0, r0, r2 + restore_pc + + .section .fixup,"ax" + .align 0 +9001: mov r0, #0 + restore_pc + .previous + diff --git a/arch/arm26/lib/ucmpdi2.c b/arch/arm26/lib/ucmpdi2.c new file mode 100644 index 000000000000..6c6ae63efa02 --- /dev/null +++ b/arch/arm26/lib/ucmpdi2.c @@ -0,0 +1,51 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" + +word_type +__ucmpdi2 (DItype a, DItype b) +{ + DIunion au, bu; + + au.ll = a, bu.ll = b; + + if ((USItype) au.s.high < (USItype) bu.s.high) + return 0; + else if ((USItype) au.s.high > (USItype) bu.s.high) + return 2; + if ((USItype) au.s.low < (USItype) bu.s.low) + return 0; + else if ((USItype) au.s.low > (USItype) bu.s.low) + return 2; + return 1; +} + diff --git a/arch/arm26/lib/udivdi3.c b/arch/arm26/lib/udivdi3.c new file mode 100644 index 000000000000..d25195f673f4 --- /dev/null +++ b/arch/arm26/lib/udivdi3.c @@ -0,0 +1,242 @@ +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + */ +/* support functions required by the kernel. based on code from gcc-2.95.3 */ +/* I Molton 29/07/01 */ + +#include "gcclib.h" +#include "longlong.h" + +static const UQItype __clz_tab[] = +{ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +}; + +UDItype +__udivmoddi4 (UDItype n, UDItype d, UDItype *rp) +{ + DIunion ww; + DIunion nn, dd; + DIunion rr; + USItype d0, d1, n0, n1, n2; + USItype q0, q1; + USItype b, bm; + + nn.ll = n; + dd.ll = d; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of SI_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) + { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + } + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } + else + q0 = 0; + + q1 = 0; + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + USItype m1, m0; + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) + { + sub_ddmmss (n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; + } + } + } + } + + ww.s.low = q0; + ww.s.high = q1; + return ww.ll; +} + +UDItype +__udivdi3 (UDItype n, UDItype d) +{ + return __udivmoddi4 (n, d, (UDItype *) 0); +} + +UDItype +__umoddi3 (UDItype u, UDItype v) +{ + UDItype w; + + (void) __udivmoddi4 (u ,v, &w); + + return w; +} + diff --git a/arch/arm26/machine/Makefile b/arch/arm26/machine/Makefile new file mode 100644 index 000000000000..86ea97cc07fc --- /dev/null +++ b/arch/arm26/machine/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the linux kernel. +# + +# Object file lists. + +obj-y := dma.o irq.o latches.o + diff --git a/arch/arm26/machine/dma.c b/arch/arm26/machine/dma.c new file mode 100644 index 000000000000..cbc7c61d5b32 --- /dev/null +++ b/arch/arm26/machine/dma.c @@ -0,0 +1,215 @@ +/* + * linux/arch/arm26/kernel/dma.c + * + * Copyright (C) 1998-1999 Dave Gilbert / Russell King + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * DMA functions specific to Archimedes and A5000 architecture + */ +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/dma.h> +#include <asm/fiq.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/hardware.h> +#include <asm/mach-types.h> + +#define DPRINTK(x...) printk(KERN_DEBUG x) + +#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE) + +extern unsigned char fdc1772_dma_read, fdc1772_dma_read_end; +extern unsigned char fdc1772_dma_write, fdc1772_dma_write_end; +extern void fdc1772_setupdma(unsigned int count,unsigned int addr); + +static void arc_floppy_data_enable_dma(dmach_t channel, dma_t *dma) +{ + DPRINTK("arc_floppy_data_enable_dma\n"); + + if (dma->using_sg) + BUG(); + + switch (dma->dma_mode) { + case DMA_MODE_READ: { /* read */ + unsigned long flags; + DPRINTK("enable_dma fdc1772 data read\n"); + local_save_flags_cli(flags); + clf(); + + memcpy ((void *)0x1c, (void *)&fdc1772_dma_read, + &fdc1772_dma_read_end - &fdc1772_dma_read); + fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */ + enable_fiq(FIQ_FLOPPYDATA); + local_irq_restore(flags); + } + break; + + case DMA_MODE_WRITE: { /* write */ + unsigned long flags; + DPRINTK("enable_dma fdc1772 data write\n"); + local_save_flags_cli(flags); + clf(); + memcpy ((void *)0x1c, (void *)&fdc1772_dma_write, + &fdc1772_dma_write_end - &fdc1772_dma_write); + fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */ + enable_fiq(FIQ_FLOPPYDATA); + + local_irq_restore(flags); + } + break; + default: + printk ("enable_dma: dma%d not initialised\n", channel); + } +} + +static int arc_floppy_data_get_dma_residue(dmach_t channel, dma_t *dma) +{ + extern unsigned int fdc1772_bytestogo; + + /* 10/1/1999 DAG - I presume its the number of bytes left? */ + return fdc1772_bytestogo; +} + +static void arc_floppy_cmdend_enable_dma(dmach_t channel, dma_t *dma) +{ + /* Need to build a branch at the FIQ address */ + extern void fdc1772_comendhandler(void); + unsigned long flags; + + DPRINTK("arc_floppy_cmdend_enable_dma\n"); + /*printk("enable_dma fdc1772 command end FIQ\n");*/ + save_flags(flags); + clf(); + + /* B fdc1772_comendhandler */ + *((unsigned int *)0x1c)=0xea000000 | + (((unsigned int)fdc1772_comendhandler-(0x1c+8))/4); + + local_irq_restore(flags); +} + +static int arc_floppy_cmdend_get_dma_residue(dmach_t channel, dma_t *dma) +{ + /* 10/1/1999 DAG - Presume whether there is an outstanding command? */ + extern unsigned int fdc1772_fdc_int_done; + + /* Explicit! If the int done is 0 then 1 int to go */ + return (fdc1772_fdc_int_done==0)?1:0; +} + +static void arc_disable_dma(dmach_t channel, dma_t *dma) +{ + disable_fiq(dma->dma_irq); +} + +static struct dma_ops arc_floppy_data_dma_ops = { + .type = "FIQDMA", + .enable = arc_floppy_data_enable_dma, + .disable = arc_disable_dma, + .residue = arc_floppy_data_get_dma_residue, +}; + +static struct dma_ops arc_floppy_cmdend_dma_ops = { + .type = "FIQCMD", + .enable = arc_floppy_cmdend_enable_dma, + .disable = arc_disable_dma, + .residue = arc_floppy_cmdend_get_dma_residue, +}; +#endif + +#ifdef CONFIG_ARCH_A5K +static struct fiq_handler fh = { + .name = "floppydata" +}; + +static int a5k_floppy_get_dma_residue(dmach_t channel, dma_t *dma) +{ + struct pt_regs regs; + get_fiq_regs(®s); + return regs.ARM_r9; +} + +static void a5k_floppy_enable_dma(dmach_t channel, dma_t *dma) +{ + struct pt_regs regs; + void *fiqhandler_start; + unsigned int fiqhandler_length; + extern void floppy_fiqsetup(unsigned long len, unsigned long addr, + unsigned long port); + + if (dma->using_sg) + BUG(); + + if (dma->dma_mode == DMA_MODE_READ) { + extern unsigned char floppy_fiqin_start, floppy_fiqin_end; + fiqhandler_start = &floppy_fiqin_start; + fiqhandler_length = &floppy_fiqin_end - &floppy_fiqin_start; + } else { + extern unsigned char floppy_fiqout_start, floppy_fiqout_end; + fiqhandler_start = &floppy_fiqout_start; + fiqhandler_length = &floppy_fiqout_end - &floppy_fiqout_start; + } + if (claim_fiq(&fh)) { + printk("floppydma: couldn't claim FIQ.\n"); + return; + } + memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length); + regs.ARM_r9 = dma->buf.length; + regs.ARM_r10 = (unsigned long)dma->buf.__address; + regs.ARM_fp = FLOPPYDMA_BASE; + set_fiq_regs(®s); + enable_fiq(dma->dma_irq); +} + +static void a5k_floppy_disable_dma(dmach_t channel, dma_t *dma) +{ + disable_fiq(dma->dma_irq); + release_fiq(&fh); +} + +static struct dma_ops a5k_floppy_dma_ops = { + .type = "FIQDMA", + .enable = a5k_floppy_enable_dma, + .disable = a5k_floppy_disable_dma, + .residue = a5k_floppy_get_dma_residue, +}; +#endif + +/* + * This is virtual DMA - we don't need anything here + */ +static void sound_enable_disable_dma(dmach_t channel, dma_t *dma) +{ +} + +static struct dma_ops sound_dma_ops = { + .type = "VIRTUAL", + .enable = sound_enable_disable_dma, + .disable = sound_enable_disable_dma, +}; + +void __init arch_dma_init(dma_t *dma) +{ +#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE) + if (machine_is_archimedes()) { + dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA; + dma[DMA_VIRTUAL_FLOPPY0].d_ops = &arc_floppy_data_dma_ops; + dma[DMA_VIRTUAL_FLOPPY1].dma_irq = 1; + dma[DMA_VIRTUAL_FLOPPY1].d_ops = &arc_floppy_cmdend_dma_ops; + } +#endif +#ifdef CONFIG_ARCH_A5K + if (machine_is_a5k()) { + dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA; + dma[DMA_VIRTUAL_FLOPPY0].d_ops = &a5k_floppy_dma_ops; + } +#endif + dma[DMA_VIRTUAL_SOUND].d_ops = &sound_dma_ops; +} diff --git a/arch/arm26/machine/irq.c b/arch/arm26/machine/irq.c new file mode 100644 index 000000000000..4361863f7ed2 --- /dev/null +++ b/arch/arm26/machine/irq.c @@ -0,0 +1,165 @@ +/* + * linux/arch/arm26/mach-arc/irq.c + * + * Copyright (C) 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Changelog: + * 24-09-1996 RMK Created + * 10-10-1996 RMK Brought up to date with arch-sa110eval + * 22-10-1996 RMK Changed interrupt numbers & uses new inb/outb macros + * 11-01-1998 RMK Added mask_and_ack_irq + * 22-08-1998 RMK Restructured IRQ routines + * 08-09-2002 IM Brought up to date for 2.5 + * 01-06-2003 JMA Removed arc_fiq_chip + */ +#include <linux/config.h> +#include <linux/init.h> + +#include <asm/irq.h> +#include <asm/irqchip.h> +#include <asm/ioc.h> +#include <asm/io.h> +#include <asm/system.h> + +extern void init_FIQ(void); + +#define a_clf() clf() +#define a_stf() stf() + +static void arc_ack_irq_a(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << irq; + a_clf(); + val = ioc_readb(IOC_IRQMASKA); + ioc_writeb(val & ~mask, IOC_IRQMASKA); + ioc_writeb(mask, IOC_IRQCLRA); + a_stf(); +} + +static void arc_mask_irq_a(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << irq; + a_clf(); + val = ioc_readb(IOC_IRQMASKA); + ioc_writeb(val & ~mask, IOC_IRQMASKA); + a_stf(); +} + +static void arc_unmask_irq_a(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << irq; + a_clf(); + val = ioc_readb(IOC_IRQMASKA); + ioc_writeb(val | mask, IOC_IRQMASKA); + a_stf(); +} + +static struct irqchip arc_a_chip = { + .ack = arc_ack_irq_a, + .mask = arc_mask_irq_a, + .unmask = arc_unmask_irq_a, +}; + +static void arc_mask_irq_b(unsigned int irq) +{ + unsigned int val, mask; + mask = 1 << (irq & 7); + val = ioc_readb(IOC_IRQMASKB); + ioc_writeb(val & ~mask, IOC_IRQMASKB); +} + +static void arc_unmask_irq_b(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << (irq & 7); + val = ioc_readb(IOC_IRQMASKB); + ioc_writeb(val | mask, IOC_IRQMASKB); +} + +static struct irqchip arc_b_chip = { + .ack = arc_mask_irq_b, + .mask = arc_mask_irq_b, + .unmask = arc_unmask_irq_b, +}; + +/* FIXME - JMA none of these functions are used in arm26 currently +static void arc_mask_irq_fiq(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << (irq & 7); + val = ioc_readb(IOC_FIQMASK); + ioc_writeb(val & ~mask, IOC_FIQMASK); +} + +static void arc_unmask_irq_fiq(unsigned int irq) +{ + unsigned int val, mask; + + mask = 1 << (irq & 7); + val = ioc_readb(IOC_FIQMASK); + ioc_writeb(val | mask, IOC_FIQMASK); +} + +static struct irqchip arc_fiq_chip = { + .ack = arc_mask_irq_fiq, + .mask = arc_mask_irq_fiq, + .unmask = arc_unmask_irq_fiq, +}; +*/ + +void __init arc_init_irq(void) +{ + unsigned int irq, flags; + + /* Disable all IOC interrupt sources */ + ioc_writeb(0, IOC_IRQMASKA); + ioc_writeb(0, IOC_IRQMASKB); + ioc_writeb(0, IOC_FIQMASK); + + for (irq = 0; irq < NR_IRQS; irq++) { + flags = IRQF_VALID; + + if (irq <= 6 || (irq >= 9 && irq <= 15)) + flags |= IRQF_PROBE; + + if (irq == IRQ_KEYBOARDTX) + flags |= IRQF_NOAUTOEN; + + switch (irq) { + case 0 ... 7: + set_irq_chip(irq, &arc_a_chip); + set_irq_handler(irq, do_level_IRQ); + set_irq_flags(irq, flags); + break; + + case 8 ... 15: + set_irq_chip(irq, &arc_b_chip); + set_irq_handler(irq, do_level_IRQ); + set_irq_flags(irq, flags); + +/* case 64 ... 72: + set_irq_chip(irq, &arc_fiq_chip); + set_irq_flags(irq, flags); + break; +*/ + + } + } + + irq_desc[IRQ_KEYBOARDTX].noautoenable = 1; + + init_FIQ(); +} + diff --git a/arch/arm26/machine/latches.c b/arch/arm26/machine/latches.c new file mode 100644 index 000000000000..94f05d2a3b2b --- /dev/null +++ b/arch/arm26/machine/latches.c @@ -0,0 +1,72 @@ +/* + * linux/arch/arm26/kernel/latches.c + * + * Copyright (C) David Alan Gilbert 1995/1996,2000 + * Copyright (C) Ian Molton 2003 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Support for the latches on the old Archimedes which control the floppy, + * hard disc and printer + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/hardware.h> +#include <asm/mach-types.h> +#include <asm/oldlatches.h> + +static unsigned char latch_a_copy; +static unsigned char latch_b_copy; + +/* newval=(oldval & ~mask)|newdata */ +void oldlatch_aupdate(unsigned char mask,unsigned char newdata) +{ + unsigned long flags; + + BUG_ON(!machine_is_archimedes()); + + local_irq_save(flags); //FIXME: was local_save_flags + latch_a_copy = (latch_a_copy & ~mask) | newdata; + __raw_writeb(latch_a_copy, LATCHA_BASE); + local_irq_restore(flags); + + printk("Latch: A = 0x%02x\n", latch_a_copy); +} + + +/* newval=(oldval & ~mask)|newdata */ +void oldlatch_bupdate(unsigned char mask,unsigned char newdata) +{ + unsigned long flags; + + BUG_ON(!machine_is_archimedes()); + + + local_irq_save(flags);//FIXME: was local_save_flags + latch_b_copy = (latch_b_copy & ~mask) | newdata; + __raw_writeb(latch_b_copy, LATCHB_BASE); + local_irq_restore(flags); + + printk("Latch: B = 0x%02x\n", latch_b_copy); +} + +static int __init oldlatch_init(void) +{ + if (machine_is_archimedes()) { + oldlatch_aupdate(0xff, 0xff); + /* Thats no FDC reset...*/ + oldlatch_bupdate(0xff, LATCHB_FDCRESET); + } + return 0; +} + +arch_initcall(oldlatch_init); + +EXPORT_SYMBOL(oldlatch_aupdate); +EXPORT_SYMBOL(oldlatch_bupdate); diff --git a/arch/arm26/mm/Makefile b/arch/arm26/mm/Makefile new file mode 100644 index 000000000000..a8fb166d5c6d --- /dev/null +++ b/arch/arm26/mm/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the linux arm26-specific parts of the memory manager. +# + +obj-y := init.o extable.o proc-funcs.o memc.o fault.o \ + small_page.o diff --git a/arch/arm26/mm/extable.c b/arch/arm26/mm/extable.c new file mode 100644 index 000000000000..2d9f5b5a78d6 --- /dev/null +++ b/arch/arm26/mm/extable.c @@ -0,0 +1,25 @@ +/* + * linux/arch/arm26/mm/extable.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <asm/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + + /* + * The kernel runs in SVC mode - make sure we keep running in SVC mode + * by frobbing the PSR appropriately (PSR and PC are in the same reg. + * on ARM26) + */ + if (fixup) + regs->ARM_pc = fixup->fixup | PSR_I_BIT | MODE_SVC26; + + return fixup != NULL; +} + diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c new file mode 100644 index 000000000000..dacca8bb7744 --- /dev/null +++ b/arch/arm26/mm/fault.c @@ -0,0 +1,318 @@ +/* + * linux/arch/arm26/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> //FIXME this header may be bogusly included + +#include "fault.h" + +#define FAULT_CODE_LDRSTRPOST 0x80 +#define FAULT_CODE_LDRSTRPRE 0x40 +#define FAULT_CODE_LDRSTRREG 0x20 +#define FAULT_CODE_LDMSTM 0x10 +#define FAULT_CODE_LDCSTC 0x08 +#define FAULT_CODE_PREFETCH 0x04 +#define FAULT_CODE_WRITE 0x02 +#define FAULT_CODE_FORCECOW 0x01 + +#define DO_COW(m) ((m) & (FAULT_CODE_WRITE|FAULT_CODE_FORCECOW)) +#define READ_FAULT(m) (!((m) & FAULT_CODE_WRITE)) +#define DEBUG +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (!mm) + mm = &init_mm; + + printk(KERN_ALERT "pgd = %p\n", mm->pgd); + pgd = pgd_offset(mm, addr); + printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); + + do { + pmd_t *pmd; + pte_t *pte; + + pmd = pmd_offset(pgd, addr); + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + printk("(bad)"); + break; + } + + /* We must not map this if we have highmem enabled */ + /* FIXME */ + pte = pte_offset_map(pmd, addr); + printk(", *pte=%08lx", pte_val(*pte)); + pte_unmap(pte); + } while(0); + + printk("\n"); +} + +/* + * Oops. The kernel tried to access some page that wasn't present. + */ +static void +__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + /* + * Are we prepared to handle this kernel fault? + */ + if (fixup_exception(regs)) + return; + + /* + * No handler, we'll have to terminate things with extreme prejudice. + */ + bust_spinlocks(1); + printk(KERN_ALERT + "Unable to handle kernel %s at virtual address %08lx\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + + show_pte(mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + +/* + * Something tried to access memory that isn't in our memory map.. + * User mode accesses just cause a SIGSEGV + */ +static void +__do_user_fault(struct task_struct *tsk, unsigned long addr, + unsigned int fsr, int code, struct pt_regs *regs) +{ + struct siginfo si; + +#ifdef CONFIG_DEBUG_USER + printk("%s: unhandled page fault at 0x%08lx, code 0x%03x\n", + tsk->comm, addr, fsr); + show_pte(tsk->mm, addr); + show_regs(regs); + //dump_backtrace(regs, tsk); // FIXME ARM32 dropped this - why? + while(1); //FIXME - hack to stop debug going nutso +#endif + + tsk->thread.address = addr; + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void *)addr; + force_sig_info(SIGSEGV, &si, tsk); +} + +static int +__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct task_struct *tsk) +{ + struct vm_area_struct *vma; + int fault, mask; + + vma = find_vma(mm, addr); + fault = -2; /* bad map area */ + if (!vma) + goto out; + if (vma->vm_start > addr) + goto check_stack; + + /* + * Ok, we have a good vm_area for this + * memory access, so we can handle it. + */ +good_area: + if (READ_FAULT(fsr)) /* read? */ + mask = VM_READ|VM_EXEC; + else + mask = VM_WRITE; + + fault = -1; /* bad access type */ + if (!(vma->vm_flags & mask)) + goto out; + + /* + * If for any reason at all we couldn't handle + * the fault, make sure we exit gracefully rather + * than endlessly redo the fault. + */ +survive: + fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, DO_COW(fsr)); + + /* + * Handle the "normal" cases first - successful and sigbus + */ + switch (fault) { + case 2: + tsk->maj_flt++; + return fault; + case 1: + tsk->min_flt++; + case 0: + return fault; + } + + fault = -3; /* out of memory */ + if (tsk->pid != 1) + goto out; + + /* + * If we are out of memory for pid1, + * sleep for a while and retry + */ + yield(); + goto survive; + +check_stack: + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + goto good_area; +out: + return fault; +} + +int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + int fault; + + tsk = current; + mm = tsk->mm; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + fault = __do_page_fault(mm, addr, fsr, tsk); + up_read(&mm->mmap_sem); + + /* + * Handle the "normal" case first + */ + if (fault > 0) + return 0; + + /* + * We had some memory, but were unable to + * successfully fix up this page fault. + */ + if (fault == 0){ + goto do_sigbus; + } + + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + * FIXME - is this test right? + */ + if (!user_mode(regs)){ + goto no_context; + } + + if (fault == -3) { + /* + * We ran out of memory, or some other thing happened to + * us that made us unable to handle the page fault gracefully. + */ + printk("VM: killing process %s\n", tsk->comm); + do_exit(SIGKILL); + } + else{ + __do_user_fault(tsk, addr, fsr, fault == -1 ? SEGV_ACCERR : SEGV_MAPERR, regs); + } + + return 0; + + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +do_sigbus: + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.address = addr; //FIXME - need other bits setting? + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + force_sig(SIGBUS, tsk); +#ifdef CONFIG_DEBUG_USER + printk(KERN_DEBUG "%s: sigbus at 0x%08lx, pc=0x%08lx\n", + current->comm, addr, instruction_pointer(regs)); +#endif + + /* Kernel mode? Handle exceptions or die */ + if (user_mode(regs)) + return 0; + +no_context: + __do_kernel_fault(mm, addr, fsr, regs); + return 0; +} + +/* + * Handle a data abort. Note that we have to handle a range of addresses + * on ARM2/3 for ldm. If both pages are zero-mapped, then we have to force + * a copy-on-write. However, on the second page, we always force COW. + */ +asmlinkage void +do_DataAbort(unsigned long min_addr, unsigned long max_addr, int mode, struct pt_regs *regs) +{ + do_page_fault(min_addr, mode, regs); + + if ((min_addr ^ max_addr) >> PAGE_SHIFT){ + do_page_fault(max_addr, mode | FAULT_CODE_FORCECOW, regs); + } +} + +asmlinkage int +do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) +{ +#if 0 + if (the memc mapping for this page exists) { + printk ("Page in, but got abort (undefined instruction?)\n"); + return 0; + } +#endif + do_page_fault(addr, FAULT_CODE_PREFETCH, regs); + return 1; +} + diff --git a/arch/arm26/mm/fault.h b/arch/arm26/mm/fault.h new file mode 100644 index 000000000000..4442d00d86ac --- /dev/null +++ b/arch/arm26/mm/fault.h @@ -0,0 +1,5 @@ +void show_pte(struct mm_struct *mm, unsigned long addr); + +int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs); + +unsigned long search_extable(unsigned long addr); //FIXME - is it right? diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c new file mode 100644 index 000000000000..1f09a9d0fb83 --- /dev/null +++ b/arch/arm26/mm/init.c @@ -0,0 +1,412 @@ +/* + * linux/arch/arm26/mm/init.c + * + * Copyright (C) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/initrd.h> +#include <linux/bootmem.h> +#include <linux/blkdev.h> + +#include <asm/segment.h> +#include <asm/mach-types.h> +#include <asm/dma.h> +#include <asm/hardware.h> +#include <asm/setup.h> +#include <asm/tlb.h> + +#include <asm/map.h> + + +#define TABLE_SIZE PTRS_PER_PTE * sizeof(pte_t)) + +struct mmu_gather mmu_gathers[NR_CPUS]; + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern char _stext, _text, _etext, _end, __init_begin, __init_end; +#ifdef CONFIG_XIP_KERNEL +extern char _endtext, _sdata; +#endif +extern unsigned long phys_initrd_start; +extern unsigned long phys_initrd_size; + +/* + * The sole use of this is to pass memory configuration + * data from paging_init to mem_init. + */ +static struct meminfo meminfo __initdata = { 0, }; + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; + +void show_mem(void) +{ + int free = 0, total = 0, reserved = 0; + int shared = 0, cached = 0, slab = 0; + struct page *page, *end; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + + + page = NODE_MEM_MAP(0); + end = page + NODE_DATA(0)->node_spanned_pages; + + do { + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (PageSlab(page)) + slab++; + else if (!page_count(page)) + free++; + else + shared += page_count(page) - 1; + page++; + } while (page < end); + + printk("%d pages of RAM\n", total); + printk("%d free pages\n", free); + printk("%d reserved pages\n", reserved); + printk("%d slab pages\n", slab); + printk("%d pages shared\n", shared); + printk("%d pages swap cached\n", cached); +} + +struct node_info { + unsigned int start; + unsigned int end; + int bootmap_pages; +}; + +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT) +#define PFN_SIZE(x) ((x) >> PAGE_SHIFT) +#define PFN_RANGE(s,e) PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \ + (((unsigned long)(s)) & PAGE_MASK)) + +/* + * FIXME: We really want to avoid allocating the bootmap bitmap + * over the top of the initrd. Hopefully, this is located towards + * the start of a bank, so if we allocate the bootmap bitmap at + * the end, we won't clash. + */ +static unsigned int __init +find_bootmap_pfn(struct meminfo *mi, unsigned int bootmap_pages) +{ + unsigned int start_pfn, bootmap_pfn; + unsigned int start, end; + + start_pfn = PFN_UP((unsigned long)&_end); + bootmap_pfn = 0; + + /* ARM26 machines only have one node */ + if (mi->bank->node != 0) + BUG(); + + start = PFN_UP(mi->bank->start); + end = PFN_DOWN(mi->bank->size + mi->bank->start); + + if (start < start_pfn) + start = start_pfn; + + if (end <= start) + BUG(); + + if (end - start >= bootmap_pages) + bootmap_pfn = start; + else + BUG(); + + return bootmap_pfn; +} + +/* + * Scan the memory info structure and pull out: + * - the end of memory + * - the number of nodes + * - the pfn range of each node + * - the number of bootmem bitmap pages + */ +static void __init +find_memend_and_nodes(struct meminfo *mi, struct node_info *np) +{ + unsigned int memend_pfn = 0; + + nodes_clear(node_online_map); + node_set_online(0); + + np->bootmap_pages = 0; + + if (mi->bank->size == 0) { + BUG(); + } + + /* + * Get the start and end pfns for this bank + */ + np->start = PFN_UP(mi->bank->start); + np->end = PFN_DOWN(mi->bank->start + mi->bank->size); + + if (memend_pfn < np->end) + memend_pfn = np->end; + + /* + * Calculate the number of pages we require to + * store the bootmem bitmaps. + */ + np->bootmap_pages = bootmem_bootmap_pages(np->end - np->start); + + /* + * This doesn't seem to be used by the Linux memory + * manager any more. If we can get rid of it, we + * also get rid of some of the stuff above as well. + */ + max_low_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET); + max_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET); + mi->end = memend_pfn << PAGE_SHIFT; + +} + +/* + * Initialise the bootmem allocator for all nodes. This is called + * early during the architecture specific initialisation. + */ +void __init bootmem_init(struct meminfo *mi) +{ + struct node_info node_info; + unsigned int bootmap_pfn; + pg_data_t *pgdat = NODE_DATA(0); + + find_memend_and_nodes(mi, &node_info); + + bootmap_pfn = find_bootmap_pfn(mi, node_info.bootmap_pages); + + /* + * Note that node 0 must always have some pages. + */ + if (node_info.end == 0) + BUG(); + + /* + * Initialise the bootmem allocator. + */ + init_bootmem_node(pgdat, bootmap_pfn, node_info.start, node_info.end); + + /* + * Register all available RAM in this node with the bootmem allocator. + */ + free_bootmem_node(pgdat, mi->bank->start, mi->bank->size); + + /* + * Register the kernel text and data with bootmem. + * Note: with XIP we dont register .text since + * its in ROM. + */ +#ifdef CONFIG_XIP_KERNEL + reserve_bootmem_node(pgdat, __pa(&_sdata), &_end - &_sdata); +#else + reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); +#endif + + /* + * And don't forget to reserve the allocator bitmap, + * which will be freed later. + */ + reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT, + node_info.bootmap_pages << PAGE_SHIFT); + + /* + * These should likewise go elsewhere. They pre-reserve + * the screen memory region at the start of main system + * memory. FIXME - screen RAM is not 512K! + */ + reserve_bootmem_node(pgdat, 0x02000000, 0x00080000); + +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = phys_initrd_start; + initrd_end = initrd_start + phys_initrd_size; + + /* Achimedes machines only have one node, so initrd is in node 0 */ +#ifdef CONFIG_XIP_KERNEL + /* Only reserve initrd space if it is in RAM */ + if(initrd_start && initrd_start < 0x03000000){ +#else + if(initrd_start){ +#endif + reserve_bootmem_node(pgdat, __pa(initrd_start), + initrd_end - initrd_start); + } +#endif /* CONFIG_BLK_DEV_INITRD */ + + +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(struct meminfo *mi) +{ + void *zero_page; + unsigned long zone_size[MAX_NR_ZONES]; + unsigned long zhole_size[MAX_NR_ZONES]; + struct bootmem_data *bdata; + pg_data_t *pgdat; + int i; + + memcpy(&meminfo, mi, sizeof(meminfo)); + + /* + * allocate the zero page. Note that we count on this going ok. + */ + zero_page = alloc_bootmem_low_pages(PAGE_SIZE); + + /* + * initialise the page tables. + */ + memtable_init(mi); + flush_tlb_all(); + + /* + * initialise the zones in node 0 (archimedes have only 1 node) + */ + + for (i = 0; i < MAX_NR_ZONES; i++) { + zone_size[i] = 0; + zhole_size[i] = 0; + } + + pgdat = NODE_DATA(0); + bdata = pgdat->bdata; + zone_size[0] = bdata->node_low_pfn - + (bdata->node_boot_start >> PAGE_SHIFT); + if (!zone_size[0]) + BUG(); + pgdat->node_mem_map = NULL; + free_area_init_node(0, pgdat, zone_size, + bdata->node_boot_start >> PAGE_SHIFT, zhole_size); + + /* + * finish off the bad pages once + * the mem_map is initialised + */ + memzero(zero_page, PAGE_SIZE); + empty_zero_page = virt_to_page(zero_page); +} + +static inline void free_area(unsigned long addr, unsigned long end, char *s) +{ + unsigned int size = (end - addr) >> 10; + + for (; addr < end; addr += PAGE_SIZE) { + struct page *page = virt_to_page(addr); + ClearPageReserved(page); + set_page_count(page, 1); + free_page(addr); + totalram_pages++; + } + + if (size && s) + printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); +} + +/* + * mem_init() marks the free areas in the mem_map and tells us how much + * memory is free. This is done after various parts of the system have + * claimed their memory after the kernel image. + */ +void __init mem_init(void) +{ + unsigned int codepages, datapages, initpages; + pg_data_t *pgdat = NODE_DATA(0); + extern int sysctl_overcommit_memory; + + + /* Note: data pages includes BSS */ +#ifdef CONFIG_XIP_KERNEL + codepages = &_endtext - &_text; + datapages = &_end - &_sdata; +#else + codepages = &_etext - &_text; + datapages = &_end - &_etext; +#endif + initpages = &__init_end - &__init_begin; + + high_memory = (void *)__va(meminfo.end); + max_mapnr = virt_to_page(high_memory) - mem_map; + + /* this will put all unused low memory onto the freelists */ + if (pgdat->node_spanned_pages != 0) + totalram_pages += free_all_bootmem_node(pgdat); + + num_physpages = meminfo.bank[0].size >> PAGE_SHIFT; + + printk(KERN_INFO "Memory: %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); + printk(KERN_NOTICE "Memory: %luKB available (%dK code, " + "%dK data, %dK init)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + codepages >> 10, datapages >> 10, initpages >> 10); + + /* + * Turn on overcommit on tiny machines + */ + if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; + printk("Turning on overcommit\n"); + } +} + +void free_initmem(void){ +#ifndef CONFIG_XIP_KERNEL + free_area((unsigned long)(&__init_begin), + (unsigned long)(&__init_end), + "init"); +#endif +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_XIP_KERNEL + /* Only bin initrd if it is in RAM... */ + if(!keep_initrd && start < 0x03000000) +#else + if (!keep_initrd) +#endif + free_area(start, end, "initrd"); +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c new file mode 100644 index 000000000000..8e8a2bb2487d --- /dev/null +++ b/arch/arm26/mm/memc.c @@ -0,0 +1,202 @@ +/* + * linux/arch/arm26/mm/memc.c + * + * Copyright (C) 1998-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Page table sludge for older ARM processor architectures. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/memory.h> +#include <asm/hardware.h> + +#include <asm/map.h> + +#define MEMC_TABLE_SIZE (256*sizeof(unsigned long)) + +kmem_cache_t *pte_cache, *pgd_cache; +int page_nr; + +/* + * Allocate space for a page table and a MEMC table. + * Note that we place the MEMC + * table before the page directory. This means we can + * easily get to both tightly-associated data structures + * with a single pointer. + */ +static inline pgd_t *alloc_pgd_table(void) +{ + void *pg2k = kmem_cache_alloc(pgd_cache, GFP_KERNEL); + + if (pg2k) + pg2k += MEMC_TABLE_SIZE; + + return (pgd_t *)pg2k; +} + +/* + * Free a page table. this function is the counterpart to get_pgd_slow + * below, not alloc_pgd_table above. + */ +void free_pgd_slow(pgd_t *pgd) +{ + unsigned long tbl = (unsigned long)pgd; + + tbl -= MEMC_TABLE_SIZE; + + kmem_cache_free(pgd_cache, (void *)tbl); +} + +/* + * Allocate a new pgd and fill it in ready for use + * + * A new tasks pgd is completely empty (all pages !present) except for: + * + * o The machine vectors at virtual address 0x0 + * o The vmalloc region at the top of address space + * + */ +#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) + +pgd_t *get_pgd_slow(struct mm_struct *mm) +{ + pgd_t *new_pgd, *init_pgd; + pmd_t *new_pmd, *init_pmd; + pte_t *new_pte, *init_pte; + + new_pgd = alloc_pgd_table(); + if (!new_pgd) + goto no_pgd; + + /* + * This lock is here just to satisfy pmd_alloc and pte_lock + * FIXME: I bet we could avoid taking it pretty much altogether + */ + spin_lock(&mm->page_table_lock); + + /* + * On ARM, first page must always be allocated since it contains + * the machine vectors. + */ + new_pmd = pmd_alloc(mm, new_pgd, 0); + if (!new_pmd) + goto no_pmd; + + new_pte = pte_alloc_kernel(mm, new_pmd, 0); + if (!new_pte) + goto no_pte; + + init_pgd = pgd_offset(&init_mm, 0); + init_pmd = pmd_offset(init_pgd, 0); + init_pte = pte_offset(init_pmd, 0); + + set_pte(new_pte, *init_pte); + + /* + * the page table entries are zeroed + * when the table is created. (see the cache_ctor functions below) + * Now we need to plonk the kernel (vmalloc) area at the end of + * the address space. We copy this from the init thread, just like + * the init_pte we copied above... + */ + memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, + (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); + + spin_unlock(&mm->page_table_lock); + + /* update MEMC tables */ + cpu_memc_update_all(new_pgd); + return new_pgd; + +no_pte: + spin_unlock(&mm->page_table_lock); + pmd_free(new_pmd); + free_pgd_slow(new_pgd); + return NULL; + +no_pmd: + spin_unlock(&mm->page_table_lock); + free_pgd_slow(new_pgd); + return NULL; + +no_pgd: + return NULL; +} + +/* + * No special code is required here. + */ +void setup_mm_for_reboot(char mode) +{ +} + +/* + * This contains the code to setup the memory map on an ARM2/ARM250/ARM3 + * o swapper_pg_dir = 0x0207d000 + * o kernel proper starts at 0x0208000 + * o create (allocate) a pte to contain the machine vectors + * o populate the pte (points to 0x02078000) (FIXME - is it zeroed?) + * o populate the init tasks page directory (pgd) with the new pte + * o zero the rest of the init tasks pgdir (FIXME - what about vmalloc?!) + */ +void __init memtable_init(struct meminfo *mi) +{ + pte_t *pte; + int i; + + page_nr = max_low_pfn; + + pte = alloc_bootmem_low_pages(PTRS_PER_PTE * sizeof(pte_t)); + pte[0] = mk_pte_phys(PAGE_OFFSET + SCREEN_SIZE, PAGE_READONLY); + pmd_populate(&init_mm, pmd_offset(swapper_pg_dir, 0), pte); + + for (i = 1; i < PTRS_PER_PGD; i++) + pgd_val(swapper_pg_dir[i]) = 0; +} + +void __init iotable_init(struct map_desc *io_desc) +{ + /* nothing to do */ +} + +/* + * We never have holes in the memmap + */ +void __init create_memmap_holes(struct meminfo *mi) +{ +} + +static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +{ + memzero(pte, sizeof(pte_t) * PTRS_PER_PTE); +} + +static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags) +{ + memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t)); +} + +void __init pgtable_cache_init(void) +{ + pte_cache = kmem_cache_create("pte-cache", + sizeof(pte_t) * PTRS_PER_PTE, + 0, 0, pte_cache_ctor, NULL); + if (!pte_cache) + BUG(); + + pgd_cache = kmem_cache_create("pgd-cache", MEMC_TABLE_SIZE + + sizeof(pgd_t) * PTRS_PER_PGD, + 0, 0, pgd_cache_ctor, NULL); + if (!pgd_cache) + BUG(); +} diff --git a/arch/arm26/mm/proc-funcs.S b/arch/arm26/mm/proc-funcs.S new file mode 100644 index 000000000000..c3d4cd3f457e --- /dev/null +++ b/arch/arm26/mm/proc-funcs.S @@ -0,0 +1,359 @@ +/* + * linux/arch/arm26/mm/proc-arm2,3.S + * + * Copyright (C) 1997-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for ARM2,3 + * + * These are the low level assembler for performing cache + * and memory functions on ARM2, ARM250 and ARM3 processors. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm_offsets.h> +#include <asm/procinfo.h> +#include <asm/ptrace.h> + +/* + * MEMC workhorse code. It's both a horse which things it's a pig. + */ +/* + * Function: cpu_memc_update_entry(pgd_t *pgd, unsigned long phys_pte, unsigned long addr) + * Params : pgd Page tables/MEMC mapping + * : phys_pte physical address, or PTE + * : addr virtual address + */ +ENTRY(cpu_memc_update_entry) + tst r1, #PAGE_PRESENT @ is the page present + orreq r1, r1, #PAGE_OLD | PAGE_CLEAN + moveq r2, #0x01f00000 + mov r3, r1, lsr #13 @ convert to physical page nr + and r3, r3, #0x3fc + adr ip, memc_phys_table_32 + ldr r3, [ip, r3] + tst r1, #PAGE_OLD | PAGE_NOT_USER + biceq r3, r3, #0x200 + tsteq r1, #PAGE_READONLY | PAGE_CLEAN + biceq r3, r3, #0x300 + mov r2, r2, lsr #15 @ virtual -> nr + orr r3, r3, r2, lsl #15 + and r2, r2, #0x300 + orr r3, r3, r2, lsl #2 + and r2, r3, #255 + sub r0, r0, #256 * 4 + str r3, [r0, r2, lsl #2] + strb r3, [r3] + movs pc, lr +/* + * Params : r0 = preserved + * : r1 = memc table base (preserved) + * : r2 = page table entry + * : r3 = preserved + * : r4 = unused + * : r5 = memc physical address translation table + * : ip = virtual address (preserved) + */ +update_pte: + mov r4, r2, lsr #13 + and r4, r4, #0x3fc + ldr r4, [r5, r4] @ covert to MEMC page + + tst r2, #PAGE_OLD | PAGE_NOT_USER @ check for MEMC read + biceq r4, r4, #0x200 + tsteq r2, #PAGE_READONLY | PAGE_CLEAN @ check for MEMC write + biceq r4, r4, #0x300 + + orr r4, r4, ip + and r2, ip, #0x01800000 + orr r4, r4, r2, lsr #13 + + and r2, r4, #255 + str r4, [r1, r2, lsl #2] + movs pc, lr + +/* + * Params : r0 = preserved + * : r1 = memc table base (preserved) + * : r2 = page table base + * : r3 = preserved + * : r4 = unused + * : r5 = memc physical address translation table + * : ip = virtual address (updated) + */ +update_pte_table: + stmfd sp!, {r0, lr} + bic r0, r2, #3 +1: ldr r2, [r0], #4 @ get entry + tst r2, #PAGE_PRESENT @ page present + blne update_pte @ process pte + add ip, ip, #32768 @ increment virt addr + ldr r2, [r0], #4 @ get entry + tst r2, #PAGE_PRESENT @ page present + blne update_pte @ process pte + add ip, ip, #32768 @ increment virt addr + ldr r2, [r0], #4 @ get entry + tst r2, #PAGE_PRESENT @ page present + blne update_pte @ process pte + add ip, ip, #32768 @ increment virt addr + ldr r2, [r0], #4 @ get entry + tst r2, #PAGE_PRESENT @ page present + blne update_pte @ process pte + add ip, ip, #32768 @ increment virt addr + tst ip, #32768 * 31 @ finished? + bne 1b + ldmfd sp!, {r0, pc}^ + +/* + * Function: cpu_memc_update_all(pgd_t *pgd) + * Params : pgd Page tables/MEMC mapping + * Notes : this is optimised for 32k pages + */ +ENTRY(cpu_memc_update_all) + stmfd sp!, {r4, r5, lr} + bl clear_tables + sub r1, r0, #256 * 4 @ start of MEMC tables + adr r5, memc_phys_table_32 @ Convert to logical page number + mov ip, #0 @ virtual address +1: ldmia r0!, {r2, r3} @ load two pgd entries + tst r2, #PAGE_PRESENT @ is pgd entry present? + addeq ip, ip, #1048576 @FIXME - PAGE_PRESENT is for PTEs technically... + blne update_pte_table + mov r2, r3 + tst r2, #PAGE_PRESENT @ is pgd entry present? + addeq ip, ip, #1048576 + blne update_pte_table + teq ip, #32 * 1048576 + bne 1b + ldmfd sp!, {r4, r5, pc}^ + +/* + * Build the table to map from physical page number to memc page number + */ + .type memc_phys_table_32, #object +memc_phys_table_32: + .irp b7, 0x00, 0x80 + .irp b6, 0x00, 0x02 + .irp b5, 0x00, 0x04 + .irp b4, 0x00, 0x01 + + .irp b3, 0x00, 0x40 + .irp b2, 0x00, 0x20 + .irp b1, 0x00, 0x10 + .irp b0, 0x00, 0x08 + .long 0x03800300 + \b7 + \b6 + \b5 + \b4 + \b3 + \b2 + \b1 + \b0 + .endr + .endr + .endr + .endr + + .endr + .endr + .endr + .endr + .size memc_phys_table_32, . - memc_phys_table_32 + +/* + * helper for cpu_memc_update_all, this clears out all + * mappings, setting them close to the top of memory, + * and inaccessible (0x01f00000). + * Params : r0 = page table pointer + */ +clear_tables: ldr r1, _arm3_set_pgd - 4 + ldr r2, [r1] + sub r1, r0, #256 * 4 @ start of MEMC tables + add r2, r1, r2, lsl #2 @ end of tables + mov r3, #0x03f00000 @ Default mapping (null mapping) + orr r3, r3, #0x00000f00 + orr r4, r3, #1 + orr r5, r3, #2 + orr ip, r3, #3 +1: stmia r1!, {r3, r4, r5, ip} + add r3, r3, #4 + add r4, r4, #4 + add r5, r5, #4 + add ip, ip, #4 + stmia r1!, {r3, r4, r5, ip} + add r3, r3, #4 + add r4, r4, #4 + add r5, r5, #4 + add ip, ip, #4 + teq r1, r2 + bne 1b + mov pc, lr + +/* + * Function: *_set_pgd(pgd_t *pgd) + * Params : pgd New page tables/MEMC mapping + * Purpose : update MEMC hardware with new mapping + */ + .word page_nr @ extern - declared in mm-memc.c +_arm3_set_pgd: mcr p15, 0, r1, c1, c0, 0 @ flush cache +_arm2_set_pgd: stmfd sp!, {lr} + ldr r1, _arm3_set_pgd - 4 + ldr r2, [r1] + sub r0, r0, #256 * 4 @ start of MEMC tables + add r1, r0, r2, lsl #2 @ end of tables +1: ldmia r0!, {r2, r3, ip, lr} + strb r2, [r2] + strb r3, [r3] + strb ip, [ip] + strb lr, [lr] + ldmia r0!, {r2, r3, ip, lr} + strb r2, [r2] + strb r3, [r3] + strb ip, [ip] + strb lr, [lr] + teq r0, r1 + bne 1b + ldmfd sp!, {pc}^ + +/* + * Function: *_proc_init (void) + * Purpose : Initialise the cache control registers + */ +_arm3_proc_init: + mov r0, #0x001f0000 + orr r0, r0, #0x0000ff00 + orr r0, r0, #0x000000ff + mcr p15, 0, r0, c3, c0 @ ARM3 Cacheable + mcr p15, 0, r0, c4, c0 @ ARM3 Updateable + mov r0, #0 + mcr p15, 0, r0, c5, c0 @ ARM3 Disruptive + mcr p15, 0, r0, c1, c0 @ ARM3 Flush + mov r0, #3 + mcr p15, 0, r0, c2, c0 @ ARM3 Control +_arm2_proc_init: + movs pc, lr + +/* + * Function: *_proc_fin (void) + * Purpose : Finalise processor (disable caches) + */ +_arm3_proc_fin: mov r0, #2 + mcr p15, 0, r0, c2, c0 +_arm2_proc_fin: orrs pc, lr, #PSR_I_BIT|PSR_F_BIT + +/* + * Function: *_xchg_1 (int new, volatile void *ptr) + * Params : new New value to store at... + * : ptr pointer to byte-wide location + * Purpose : Performs an exchange operation + * Returns : Original byte data at 'ptr' + */ +_arm2_xchg_1: mov r2, pc + orr r2, r2, #PSR_I_BIT + teqp r2, #0 + ldrb r2, [r1] + strb r0, [r1] + mov r0, r2 + movs pc, lr + +_arm3_xchg_1: swpb r0, r0, [r1] + movs pc, lr + +/* + * Function: *_xchg_4 (int new, volatile void *ptr) + * Params : new New value to store at... + * : ptr pointer to word-wide location + * Purpose : Performs an exchange operation + * Returns : Original word data at 'ptr' + */ +_arm2_xchg_4: mov r2, pc + orr r2, r2, #PSR_I_BIT + teqp r2, #0 + ldr r2, [r1] + str r0, [r1] + mov r0, r2 + movs pc, lr + +_arm3_xchg_4: swp r0, r0, [r1] + movs pc, lr + +_arm2_3_check_bugs: + bics pc, lr, #PSR_F_BIT @ Clear FIQ disable bit + +armvlsi_name: .asciz "ARM/VLSI" +_arm2_name: .asciz "ARM 2" +_arm250_name: .asciz "ARM 250" +_arm3_name: .asciz "ARM 3" + + .section ".init.text", #alloc, #execinstr +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .globl arm2_processor_functions +arm2_processor_functions: + .word _arm2_3_check_bugs + .word _arm2_proc_init + .word _arm2_proc_fin + .word _arm2_set_pgd + .word _arm2_xchg_1 + .word _arm2_xchg_4 + +cpu_arm2_info: + .long armvlsi_name + .long _arm2_name + + .globl arm250_processor_functions +arm250_processor_functions: + .word _arm2_3_check_bugs + .word _arm2_proc_init + .word _arm2_proc_fin + .word _arm2_set_pgd + .word _arm3_xchg_1 + .word _arm3_xchg_4 + +cpu_arm250_info: + .long armvlsi_name + .long _arm250_name + + .globl arm3_processor_functions +arm3_processor_functions: + .word _arm2_3_check_bugs + .word _arm3_proc_init + .word _arm3_proc_fin + .word _arm3_set_pgd + .word _arm3_xchg_1 + .word _arm3_xchg_4 + +cpu_arm3_info: + .long armvlsi_name + .long _arm3_name + +arm2_arch_name: .asciz "armv1" +arm3_arch_name: .asciz "armv2" +arm2_elf_name: .asciz "v1" +arm3_elf_name: .asciz "v2" + .align + + .section ".proc.info", #alloc, #execinstr + + .long 0x41560200 + .long 0xfffffff0 + .long arm2_arch_name + .long arm2_elf_name + .long 0 + .long cpu_arm2_info + .long arm2_processor_functions + + .long 0x41560250 + .long 0xfffffff0 + .long arm3_arch_name + .long arm3_elf_name + .long 0 + .long cpu_arm250_info + .long arm250_processor_functions + + .long 0x41560300 + .long 0xfffffff0 + .long arm3_arch_name + .long arm3_elf_name + .long 0 + .long cpu_arm3_info + .long arm3_processor_functions + diff --git a/arch/arm26/mm/small_page.c b/arch/arm26/mm/small_page.c new file mode 100644 index 000000000000..77be86cca789 --- /dev/null +++ b/arch/arm26/mm/small_page.c @@ -0,0 +1,194 @@ +/* + * linux/arch/arm26/mm/small_page.c + * + * Copyright (C) 1996 Russell King + * Copyright (C) 2003 Ian Molton + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Changelog: + * 26/01/1996 RMK Cleaned up various areas to make little more generic + * 07/02/1999 RMK Support added for 16K and 32K page sizes + * containing 8K blocks + * 23/05/2004 IM Fixed to use struct page->lru (thanks wli) + * + */ +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/bitops.h> + +#include <asm/pgtable.h> + +#define PEDANTIC + +/* + * Requirement: + * We need to be able to allocate naturally aligned memory of finer + * granularity than the page size. This is typically used for the + * second level page tables on 32-bit ARMs. + * + * FIXME - this comment is *out of date* + * Theory: + * We "misuse" the Linux memory management system. We use alloc_page + * to allocate a page and then mark it as reserved. The Linux memory + * management system will then ignore the "offset", "next_hash" and + * "pprev_hash" entries in the mem_map for this page. + * + * We then use a bitstring in the "offset" field to mark which segments + * of the page are in use, and manipulate this as required during the + * allocation and freeing of these small pages. + * + * We also maintain a queue of pages being used for this purpose using + * the "next_hash" and "pprev_hash" entries of mem_map; + */ + +struct order { + struct list_head queue; + unsigned int mask; /* (1 << shift) - 1 */ + unsigned int shift; /* (1 << shift) size of page */ + unsigned int block_mask; /* nr_blocks - 1 */ + unsigned int all_used; /* (1 << nr_blocks) - 1 */ +}; + + +static struct order orders[] = { +#if PAGE_SIZE == 32768 + { LIST_HEAD_INIT(orders[0].queue), 2047, 11, 15, 0x0000ffff }, + { LIST_HEAD_INIT(orders[1].queue), 8191, 13, 3, 0x0000000f } +#else +#error unsupported page size (ARGH!) +#endif +}; + +#define USED_MAP(pg) ((pg)->index) +#define TEST_AND_CLEAR_USED(pg,off) (test_and_clear_bit(off, &USED_MAP(pg))) +#define SET_USED(pg,off) (set_bit(off, &USED_MAP(pg))) + +static DEFINE_SPINLOCK(small_page_lock); + +static unsigned long __get_small_page(int priority, struct order *order) +{ + unsigned long flags; + struct page *page; + int offset; + + do { + spin_lock_irqsave(&small_page_lock, flags); + + if (list_empty(&order->queue)) + goto need_new_page; + + page = list_entry(order->queue.next, struct page, lru); +again: +#ifdef PEDANTIC + if (USED_MAP(page) & ~order->all_used) + PAGE_BUG(page); +#endif + offset = ffz(USED_MAP(page)); + SET_USED(page, offset); + if (USED_MAP(page) == order->all_used) + list_del_init(&page->lru); + spin_unlock_irqrestore(&small_page_lock, flags); + + return (unsigned long) page_address(page) + (offset << order->shift); + +need_new_page: + spin_unlock_irqrestore(&small_page_lock, flags); + page = alloc_page(priority); + spin_lock_irqsave(&small_page_lock, flags); + + if (list_empty(&order->queue)) { + if (!page) + goto no_page; + SetPageReserved(page); + USED_MAP(page) = 0; + list_add(&page->lru, &order->queue); + goto again; + } + + spin_unlock_irqrestore(&small_page_lock, flags); + __free_page(page); + } while (1); + +no_page: + spin_unlock_irqrestore(&small_page_lock, flags); + return 0; +} + +static void __free_small_page(unsigned long spage, struct order *order) +{ + unsigned long flags; + struct page *page; + + if (virt_addr_valid(spage)) { + page = virt_to_page(spage); + + /* + * The container-page must be marked Reserved + */ + if (!PageReserved(page) || spage & order->mask) + goto non_small; + +#ifdef PEDANTIC + if (USED_MAP(page) & ~order->all_used) + PAGE_BUG(page); +#endif + + spage = spage >> order->shift; + spage &= order->block_mask; + + /* + * the following must be atomic wrt get_page + */ + spin_lock_irqsave(&small_page_lock, flags); + + if (USED_MAP(page) == order->all_used) + list_add(&page->lru, &order->queue); + + if (!TEST_AND_CLEAR_USED(page, spage)) + goto already_free; + + if (USED_MAP(page) == 0) + goto free_page; + + spin_unlock_irqrestore(&small_page_lock, flags); + } + return; + +free_page: + /* + * unlink the page from the small page queue and free it + */ + list_del_init(&page->lru); + spin_unlock_irqrestore(&small_page_lock, flags); + ClearPageReserved(page); + __free_page(page); + return; + +non_small: + printk("Trying to free non-small page from %p\n", __builtin_return_address(0)); + return; +already_free: + printk("Trying to free free small page from %p\n", __builtin_return_address(0)); +} + +unsigned long get_page_8k(int priority) +{ + return __get_small_page(priority, orders+1); +} + +void free_page_8k(unsigned long spage) +{ + __free_small_page(spage, orders+1); +} diff --git a/arch/arm26/nwfpe/ARM-gcc.h b/arch/arm26/nwfpe/ARM-gcc.h new file mode 100644 index 000000000000..e6598470b076 --- /dev/null +++ b/arch/arm26/nwfpe/ARM-gcc.h @@ -0,0 +1,120 @@ +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef char flag; +typedef unsigned char uint8; +typedef signed char int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and if +necessary ``marks'' the literal as having a 64-bit integer type. For +example, the Gnu C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##LL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE extern __inline__ + + +/* For use as a GCC soft-float library we need some special function names. */ + +#ifdef __LIBFLOAT__ + +/* Some 32-bit ops can be mapped straight across by just changing the name. */ +#define float32_add __addsf3 +#define float32_sub __subsf3 +#define float32_mul __mulsf3 +#define float32_div __divsf3 +#define int32_to_float32 __floatsisf +#define float32_to_int32_round_to_zero __fixsfsi +#define float32_to_uint32_round_to_zero __fixunssfsi + +/* These ones go through the glue code. To avoid namespace pollution + we rename the internal functions too. */ +#define float32_eq ___float32_eq +#define float32_le ___float32_le +#define float32_lt ___float32_lt + +/* All the 64-bit ops have to go through the glue, so we pull the same + trick. */ +#define float64_add ___float64_add +#define float64_sub ___float64_sub +#define float64_mul ___float64_mul +#define float64_div ___float64_div +#define int32_to_float64 ___int32_to_float64 +#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero +#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero +#define float64_to_float32 ___float64_to_float32 +#define float32_to_float64 ___float32_to_float64 +#define float64_eq ___float64_eq +#define float64_le ___float64_le +#define float64_lt ___float64_lt + +#if 0 +#define float64_add __adddf3 +#define float64_sub __subdf3 +#define float64_mul __muldf3 +#define float64_div __divdf3 +#define int32_to_float64 __floatsidf +#define float64_to_int32_round_to_zero __fixdfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define float64_to_float32 __truncdfsf2 +#define float32_to_float64 __extendsfdf2 +#endif + +#endif diff --git a/arch/arm26/nwfpe/ChangeLog b/arch/arm26/nwfpe/ChangeLog new file mode 100644 index 000000000000..0c580f764baf --- /dev/null +++ b/arch/arm26/nwfpe/ChangeLog @@ -0,0 +1,83 @@ +2002-01-19 Russell King <rmk@arm.linux.org.uk> + + * fpa11.h - Add documentation + - remove userRegisters pointer from this structure. + - add new method to obtain integer register values. + * softfloat.c - Remove float128 + * softfloat.h - Remove float128 + * softfloat-specialize - Remove float128 + + * The FPA11 structure is not a kernel-specific data structure. + It is used by users of ptrace to examine the values of the + floating point registers. Therefore, any changes to the + FPA11 structure (size or position of elements contained + within) have to be well thought out. + + * Since 128-bit float requires the FPA11 structure to change + size, it has been removed. 128-bit float is currently unused, + and needs various things to be re-worked so that we won't + overflow the available space in the task structure. + + * The changes are designed to break any patch that goes on top + of this code, so that the authors properly review their changes. + +1999-08-19 Scott Bambrough <scottb@netwinder.org> + + * fpmodule.c - Changed version number to 0.95 + * fpa11.h - modified FPA11, FPREG structures + * fpa11.c - Changes due to FPA11, FPREG structure alterations. + * fpa11_cpdo.c - Changes due to FPA11, FPREG structure alterations. + * fpa11_cpdt.c - Changes due to FPA11, FPREG structure alterations. + * fpa11_cprt.c - Changes due to FPA11, FPREG structure alterations. + * single_cpdo.c - Changes due to FPA11, FPREG structure alterations. + * double_cpdo.c - Changes due to FPA11, FPREG structure alterations. + * extended_cpdo.c - Changes due to FPA11, FPREG structure alterations. + + * I discovered several bugs. First and worst is that the kernel + passes in a pointer to the FPE's state area. This is defined + as a struct user_fp (see user.h). This pointer was cast to a + FPA11*. Unfortunately FPA11 and user_fp are of different sizes; + user_fp is smaller. This meant that the FPE scribbled on things + below its area, which is bad, as the area is in the thread_struct + embedded in the process task structure. Thus we were scribbling + over one of the most important structures in the entire OS. + + * user_fp and FPA11 have now been harmonized. Most of the changes + in the above code were dereferencing problems due to moving the + register type out of FPREG, and getting rid of the union variable + fpvalue. + + * Second I noticed resetFPA11 was not always being called for a + task. This should happen on the first floating point exception + that occurs. It is controlled by init_flag in FPA11. The + comment in the code beside init_flag state the kernel guarantees + this to be zero. Not so. I found that the kernel recycles task + structures, and that recycled ones may not have init_flag zeroed. + I couldn't even find anything that guarantees it is zeroed when + when the task structure is initially allocated. In any case + I now initialize the entire FPE state in the thread structure to + zero when allocated and recycled. See alloc_task_struct() and + flush_thread() in arch/arm/process.c. The change to + alloc_task_struct() may not be necessary, but I left it in for + completeness (better safe than sorry). + +1998-11-23 Scott Bambrough <scottb@netwinder.org> + + * README.FPE - fix typo in description of lfm/sfm instructions + * NOTES - Added file to describe known bugs/problems + * fpmodule.c - Changed version number to 0.94 + +1998-11-20 Scott Bambrough <scottb@netwinder.org> + + * README.FPE - fix description of URD, NRM instructions + * TODO - remove URD, NRM instructions from TODO list + * single_cpdo.c - implement URD, NRM + * double_cpdo.c - implement URD, NRM + * extended_cpdo.c - implement URD, NRM + +1998-11-19 Scott Bambrough <scottb@netwinder.org> + + * ChangeLog - Added this file to track changes made. + * fpa11.c - added code to initialize register types to typeNone + * fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to + typeDouble in switch statement) diff --git a/arch/arm26/nwfpe/Makefile b/arch/arm26/nwfpe/Makefile new file mode 100644 index 000000000000..b39d34dff054 --- /dev/null +++ b/arch/arm26/nwfpe/Makefile @@ -0,0 +1,15 @@ +# +# Copyright (C) 1998, 1999, 2001 Philip Blundell +# + +obj-y := +obj-m := +obj-n := + +obj-$(CONFIG_FPE_NWFPE) += nwfpe.o + +nwfpe-objs := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \ + fpmodule.o fpopcode.o softfloat.o \ + single_cpdo.o double_cpdo.o extended_cpdo.o \ + entry.o + diff --git a/arch/arm26/nwfpe/double_cpdo.c b/arch/arm26/nwfpe/double_cpdo.c new file mode 100644 index 000000000000..7f4fef0216c7 --- /dev/null +++ b/arch/arm26/nwfpe/double_cpdo.c @@ -0,0 +1,288 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "softfloat.h" +#include "fpopcode.h" + +float64 float64_exp(float64 Fm); +float64 float64_ln(float64 Fm); +float64 float64_sin(float64 rFm); +float64 float64_cos(float64 rFm); +float64 float64_arcsin(float64 rFm); +float64 float64_arctan(float64 rFm); +float64 float64_log(float64 rFm); +float64 float64_tan(float64 rFm); +float64 float64_arccos(float64 rFm); +float64 float64_pow(float64 rFn,float64 rFm); +float64 float64_pol(float64 rFn,float64 rFm); + +unsigned int DoubleCPDO(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + float64 rFm, rFn = 0; //FIXME - should be zero? + unsigned int Fd, Fm, Fn, nRc = 1; + + //printk("DoubleCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getDoubleConstant(Fm); + } + else + { + switch (fpa11->fType[Fm]) + { + case typeSingle: + rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle); + break; + + case typeDouble: + rFm = fpa11->fpreg[Fm].fDouble; + break; + + case typeExtended: + // !! patb + //printk("not implemented! why not?\n"); + //!! ScottB + // should never get here, if extended involved + // then other operand should be promoted then + // ExtendedCPDO called. + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fType[Fn]) + { + case typeSingle: + rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle); + break; + + case typeDouble: + rFn = fpa11->fpreg[Fn].fDouble; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */ + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fDouble = float64_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fDouble = float64_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fDouble = float64_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fDouble = rFm; + break; + + case MNF_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] ^= 0x80000000; + fpa11->fpreg[Fd].fDouble = rFm; + } + break; + + case ABS_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] &= 0x7fffffff; + fpa11->fpreg[Fd].fDouble = rFm; + } + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fDouble = float64_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fDouble = float64_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fDouble = float64_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fDouble = float64_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fDouble = float64_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fDouble = float64_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fDouble = float64_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fDouble = float64_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fDouble = float64_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fType[Fd] = typeDouble; + return nRc; +} + +#if 0 +float64 float64_exp(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_ln(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_sin(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_cos(float64 rFm) +{ + return rFm; + //series +} + +#if 0 +float64 float64_arcsin(float64 rFm) +{ +//series +} + +float64 float64_arctan(float64 rFm) +{ + //series +} +#endif + +float64 float64_log(float64 rFm) +{ + return float64_div(float64_ln(rFm),getDoubleConstant(7)); +} + +float64 float64_tan(float64 rFm) +{ + return float64_div(float64_sin(rFm),float64_cos(rFm)); +} + +float64 float64_arccos(float64 rFm) +{ +return rFm; + //return float64_sub(halfPi,float64_arcsin(rFm)); +} + +float64 float64_pow(float64 rFn,float64 rFm) +{ + return float64_exp(float64_mul(rFm,float64_ln(rFn))); +} + +float64 float64_pol(float64 rFn,float64 rFm) +{ + return float64_arctan(float64_div(rFn,rFm)); +} +#endif diff --git a/arch/arm26/nwfpe/entry.S b/arch/arm26/nwfpe/entry.S new file mode 100644 index 000000000000..7d6dfaad80c2 --- /dev/null +++ b/arch/arm26/nwfpe/entry.S @@ -0,0 +1,114 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998 + (c) Philip Blundell 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include <asm/asm_offsets.h> + +/* This is the kernel's entry point into the floating point emulator. +It is called from the kernel with code similar to this: + + mov fp, #0 + teqp pc, #PSR_I_BIT | MODE_SVC + ldr r4, .LC2 + ldr pc, [r4] @ Call FP module USR entry point + +The kernel expects the emulator to return via one of two possible +points of return it passes to the emulator. The emulator, if +successful in its emulation, jumps to ret_from_exception and the +kernel takes care of returning control from the trap to the user code. +If the emulator is unable to emulate the instruction, it returns to +fpundefinstr and the kernel halts the user program with a core dump. + +This routine does four things: + +1) It saves SP into a variable called userRegisters. The kernel has +created a struct pt_regs on the stack and saved the user registers +into it. See /usr/include/asm/proc/ptrace.h for details. The +emulator code uses userRegisters as the base of an array of words from +which the contents of the registers can be extracted. + +2) It locates the FP emulator work area within the TSS structure and +points `fpa11' to it. + +3) It calls EmulateAll to emulate a floating point instruction. +EmulateAll returns 1 if the emulation was successful, or 0 if not. + +4) If an instruction has been emulated successfully, it looks ahead at +the next instruction. If it is a floating point instruction, it +executes the instruction, without returning to user space. In this +way it repeatedly looks ahead and executes floating point instructions +until it encounters a non floating point instruction, at which time it +returns via _fpreturn. + +This is done to reduce the effect of the trap overhead on each +floating point instructions. GCC attempts to group floating point +instructions to allow the emulator to spread the cost of the trap over +several floating point instructions. */ + + .globl nwfpe_enter +nwfpe_enter: + mov sl, sp + bl FPA11_CheckInit @ check to see if we are initialised + + ldr r5, [sp, #60] @ get contents of PC + bic r5, r5, #0xfc000003 + ldr r0, [r5, #-4] @ get actual instruction into r0 + bl EmulateAll @ emulate the instruction +1: cmp r0, #0 @ was emulation successful + beq fpundefinstr @ no, return failure + +next: +.Lx1: ldrt r6, [r5], #4 @ get the next instruction and + @ increment PC + + and r2, r6, #0x0F000000 @ test for FP insns + teq r2, #0x0C000000 + teqne r2, #0x0D000000 + teqne r2, #0x0E000000 + bne ret_from_exception @ return ok if not a fp insn + + ldr r9, [sp, #60] @ get new condition codes + and r9, r9, #0xfc000003 + orr r7, r5, r9 + str r7, [sp, #60] @ update PC copy in regs + + mov r0, r6 @ save a copy + mov r1, r9 @ fetch the condition codes + bl checkCondition @ check the condition + cmp r0, #0 @ r0 = 0 ==> condition failed + + @ if condition code failed to match, next insn + beq next @ get the next instruction; + + mov r0, r6 @ prepare for EmulateAll() + adr lr, 1b + orr lr, lr, #3 + b EmulateAll @ if r0 != 0, goto EmulateAll + +.Lret: b ret_from_exception @ let the user eat segfaults + + @ We need to be prepared for the instruction at .Lx1 to fault. + @ Emit the appropriate exception gunk to fix things up. + .section __ex_table,"a" + .align 3 + .long .Lx1 + ldr lr, [lr, $(.Lret - .Lx1)/4] + .previous diff --git a/arch/arm26/nwfpe/extended_cpdo.c b/arch/arm26/nwfpe/extended_cpdo.c new file mode 100644 index 000000000000..331407596d91 --- /dev/null +++ b/arch/arm26/nwfpe/extended_cpdo.c @@ -0,0 +1,273 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "softfloat.h" +#include "fpopcode.h" + +floatx80 floatx80_exp(floatx80 Fm); +floatx80 floatx80_ln(floatx80 Fm); +floatx80 floatx80_sin(floatx80 rFm); +floatx80 floatx80_cos(floatx80 rFm); +floatx80 floatx80_arcsin(floatx80 rFm); +floatx80 floatx80_arctan(floatx80 rFm); +floatx80 floatx80_log(floatx80 rFm); +floatx80 floatx80_tan(floatx80 rFm); +floatx80 floatx80_arccos(floatx80 rFm); +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm); +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm); + +unsigned int ExtendedCPDO(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + floatx80 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + //printk("ExtendedCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getExtendedConstant(Fm); + } + else + { + switch (fpa11->fType[Fm]) + { + case typeSingle: + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle); + break; + + case typeDouble: + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble); + break; + + case typeExtended: + rFm = fpa11->fpreg[Fm].fExtended; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fType[Fn]) + { + case typeSingle: + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); + break; + + case typeDouble: + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); + break; + + case typeExtended: + rFn = fpa11->fpreg[Fn].fExtended; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fExtended = rFm; + break; + + case MNF_CODE: + rFm.high ^= 0x8000; + fpa11->fpreg[Fd].fExtended = rFm; + break; + + case ABS_CODE: + rFm.high &= 0x7fff; + fpa11->fpreg[Fd].fExtended = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fExtended = floatx80_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fType[Fd] = typeExtended; + return nRc; +} + +#if 0 +floatx80 floatx80_exp(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_ln(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_sin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_cos(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arcsin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arctan(floatx80 rFm) +{ + //series +} + +floatx80 floatx80_log(floatx80 rFm) +{ + return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7)); +} + +floatx80 floatx80_tan(floatx80 rFm) +{ + return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm)); +} + +floatx80 floatx80_arccos(floatx80 rFm) +{ + //return floatx80_sub(halfPi,floatx80_arcsin(rFm)); +} + +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm) +{ + return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); +} + +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm) +{ + return floatx80_arctan(floatx80_div(rFn,rFm)); +} +#endif diff --git a/arch/arm26/nwfpe/fpa11.c b/arch/arm26/nwfpe/fpa11.c new file mode 100644 index 000000000000..e954540a9464 --- /dev/null +++ b/arch/arm26/nwfpe/fpa11.c @@ -0,0 +1,221 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "fpopcode.h" + +#include "fpmodule.h" +#include "fpmodule.inl" + +#include <linux/compiler.h> +#include <asm/system.h> + +/* forward declarations */ +unsigned int EmulateCPDO(const unsigned int); +unsigned int EmulateCPDT(const unsigned int); +unsigned int EmulateCPRT(const unsigned int); + +/* Reset the FPA11 chip. Called to initialize and reset the emulator. */ +void resetFPA11(void) +{ + int i; + FPA11 *fpa11 = GET_FPA11(); + + /* initialize the register type array */ + for (i=0;i<=7;i++) + { + fpa11->fType[i] = typeNone; + } + + /* FPSR: set system id to FP_EMULATOR, set AC, clear all other bits */ + fpa11->fpsr = FP_EMULATOR | BIT_AC; + + /* FPCR: set SB, AB and DA bits, clear all others */ +#if MAINTAIN_FPCR + fpa11->fpcr = MASK_RESET; +#endif +} + +void SetRoundingMode(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + FPA11 *fpa11 = GET_FPA11(); + fpa11->fpcr &= ~MASK_ROUNDING_MODE; +#endif + switch (opcode & MASK_ROUNDING_MODE) + { + default: + case ROUND_TO_NEAREST: + float_rounding_mode = float_round_nearest_even; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_NEAREST; +#endif + break; + + case ROUND_TO_PLUS_INFINITY: + float_rounding_mode = float_round_up; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_PLUS_INFINITY; +#endif + break; + + case ROUND_TO_MINUS_INFINITY: + float_rounding_mode = float_round_down; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_MINUS_INFINITY; +#endif + break; + + case ROUND_TO_ZERO: + float_rounding_mode = float_round_to_zero; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_ZERO; +#endif + break; + } +} + +void SetRoundingPrecision(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + FPA11 *fpa11 = GET_FPA11(); + fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; +#endif + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + floatx80_rounding_precision = 32; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_SINGLE; +#endif + break; + + case ROUND_DOUBLE: + floatx80_rounding_precision = 64; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_DOUBLE; +#endif + break; + + case ROUND_EXTENDED: + floatx80_rounding_precision = 80; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_EXTENDED; +#endif + break; + + default: floatx80_rounding_precision = 80; + } +} + +void FPA11_CheckInit(void) +{ + FPA11 *fpa11 = GET_FPA11(); + if (unlikely(fpa11->initflag == 0)) + { + resetFPA11(); + SetRoundingMode(ROUND_TO_NEAREST); + SetRoundingPrecision(ROUND_EXTENDED); + fpa11->initflag = 1; + } +} + +/* Emulate the instruction in the opcode. */ +unsigned int EmulateAll(unsigned int opcode) +{ + unsigned int nRc = 1, code; + + code = opcode & 0x00000f00; + if (code == 0x00000100 || code == 0x00000200) + { + /* For coprocessor 1 or 2 (FPA11) */ + code = opcode & 0x0e000000; + if (code == 0x0e000000) + { + if (opcode & 0x00000010) + { + /* Emulate conversion opcodes. */ + /* Emulate register transfer opcodes. */ + /* Emulate comparison opcodes. */ + nRc = EmulateCPRT(opcode); + } + else + { + /* Emulate monadic arithmetic opcodes. */ + /* Emulate dyadic arithmetic opcodes. */ + nRc = EmulateCPDO(opcode); + } + } + else if (code == 0x0c000000) + { + /* Emulate load/store opcodes. */ + /* Emulate load/store multiple opcodes. */ + nRc = EmulateCPDT(opcode); + } + else + { + /* Invalid instruction detected. Return FALSE. */ + nRc = 0; + } + } + + return(nRc); +} + +#if 0 +unsigned int EmulateAll1(unsigned int opcode) +{ + switch ((opcode >> 24) & 0xf) + { + case 0xc: + case 0xd: + if ((opcode >> 20) & 0x1) + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformLDF(opcode); break; + case 0x2: return PerformLFM(opcode); break; + default: return 0; + } + } + else + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformSTF(opcode); break; + case 0x2: return PerformSFM(opcode); break; + default: return 0; + } + } + break; + + case 0xe: + if (opcode & 0x10) + return EmulateCPDO(opcode); + else + return EmulateCPRT(opcode); + break; + + default: return 0; + } +} +#endif + diff --git a/arch/arm26/nwfpe/fpa11.h b/arch/arm26/nwfpe/fpa11.h new file mode 100644 index 000000000000..be09902a211d --- /dev/null +++ b/arch/arm26/nwfpe/fpa11.h @@ -0,0 +1,87 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.com, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPA11_H__ +#define __FPA11_H__ + +#define GET_FPA11() ((FPA11 *)(¤t_thread_info()->fpstate)) + +/* + * The processes registers are always at the very top of the 8K + * stack+task struct. Use the same method as 'current' uses to + * reach them. + */ +register unsigned int *user_registers asm("sl"); + +#define GET_USERREG() (user_registers) + +#include <linux/thread_info.h> + +/* includes */ +#include "fpsr.h" /* FP control and status register definitions */ +#include "softfloat.h" + +#define typeNone 0x00 +#define typeSingle 0x01 +#define typeDouble 0x02 +#define typeExtended 0x03 + +/* + * This must be no more and no less than 12 bytes. + */ +typedef union tagFPREG { + floatx80 fExtended; + float64 fDouble; + float32 fSingle; +} FPREG; + +/* + * FPA11 device model. + * + * This structure is exported to user space. Do not re-order. + * Only add new stuff to the end, and do not change the size of + * any element. Elements of this structure are used by user + * space, and must match struct user_fp in include/asm-arm/user.h. + * We include the byte offsets below for documentation purposes. + * + * The size of this structure and FPREG are checked by fpmodule.c + * on initialisation. If the rules have been broken, NWFPE will + * not initialise. + */ +typedef struct tagFPA11 { +/* 0 */ FPREG fpreg[8]; /* 8 floating point registers */ +/* 96 */ FPSR fpsr; /* floating point status register */ +/* 100 */ FPCR fpcr; /* floating point control register */ +/* 104 */ unsigned char fType[8]; /* type of floating point value held in + floating point registers. One of none + single, double or extended. */ +/* 112 */ int initflag; /* this is special. The kernel guarantees + to set it to 0 when a thread is launched, + so we can use it to detect whether this + instance of the emulator needs to be + initialised. */ +} FPA11; + +extern void resetFPA11(void); +extern void SetRoundingMode(const unsigned int); +extern void SetRoundingPrecision(const unsigned int); + +#endif diff --git a/arch/arm26/nwfpe/fpa11.inl b/arch/arm26/nwfpe/fpa11.inl new file mode 100644 index 000000000000..1c45cba2de66 --- /dev/null +++ b/arch/arm26/nwfpe/fpa11.inl @@ -0,0 +1,51 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" + +/* Read and write floating point status register */ +extern __inline__ unsigned int readFPSR(void) +{ + FPA11 *fpa11 = GET_FPA11(); + return(fpa11->fpsr); +} + +extern __inline__ void writeFPSR(FPSR reg) +{ + FPA11 *fpa11 = GET_FPA11(); + /* the sysid byte in the status register is readonly */ + fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID); +} + +/* Read and write floating point control register */ +extern __inline__ FPCR readFPCR(void) +{ + FPA11 *fpa11 = GET_FPA11(); + /* clear SB, AB and DA bits before returning FPCR */ + return(fpa11->fpcr & ~MASK_RFC); +} + +extern __inline__ void writeFPCR(FPCR reg) +{ + FPA11 *fpa11 = GET_FPA11(); + fpa11->fpcr &= ~MASK_WFC; /* clear SB, AB and DA bits */ + fpa11->fpcr |= (reg & MASK_WFC); /* write SB, AB and DA bits */ +} diff --git a/arch/arm26/nwfpe/fpa11_cpdo.c b/arch/arm26/nwfpe/fpa11_cpdo.c new file mode 100644 index 000000000000..343a6b9fd520 --- /dev/null +++ b/arch/arm26/nwfpe/fpa11_cpdo.c @@ -0,0 +1,117 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "fpopcode.h" + +unsigned int SingleCPDO(const unsigned int opcode); +unsigned int DoubleCPDO(const unsigned int opcode); +unsigned int ExtendedCPDO(const unsigned int opcode); + +unsigned int EmulateCPDO(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + unsigned int Fd, nType, nDest, nRc = 1; + + //printk("EmulateCPDO(0x%08x)\n",opcode); + + /* Get the destination size. If not valid let Linux perform + an invalid instruction trap. */ + nDest = getDestinationSize(opcode); + if (typeNone == nDest) return 0; + + SetRoundingMode(opcode); + + /* Compare the size of the operands in Fn and Fm. + Choose the largest size and perform operations in that size, + in order to make use of all the precision of the operands. + If Fm is a constant, we just grab a constant of a size + matching the size of the operand in Fn. */ + if (MONADIC_INSTRUCTION(opcode)) + nType = nDest; + else + nType = fpa11->fType[getFn(opcode)]; + + if (!CONSTANT_FM(opcode)) + { + register unsigned int Fm = getFm(opcode); + if (nType < fpa11->fType[Fm]) + { + nType = fpa11->fType[Fm]; + } + } + + switch (nType) + { + case typeSingle : nRc = SingleCPDO(opcode); break; + case typeDouble : nRc = DoubleCPDO(opcode); break; + case typeExtended : nRc = ExtendedCPDO(opcode); break; + default : nRc = 0; + } + + /* If the operation succeeded, check to see if the result in the + destination register is the correct size. If not force it + to be. */ + Fd = getFd(opcode); + nType = fpa11->fType[Fd]; + if ((0 != nRc) && (nDest != nType)) + { + switch (nDest) + { + case typeSingle: + { + if (typeDouble == nType) + fpa11->fpreg[Fd].fSingle = + float64_to_float32(fpa11->fpreg[Fd].fDouble); + else + fpa11->fpreg[Fd].fSingle = + floatx80_to_float32(fpa11->fpreg[Fd].fExtended); + } + break; + + case typeDouble: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fDouble = + float32_to_float64(fpa11->fpreg[Fd].fSingle); + else + fpa11->fpreg[Fd].fDouble = + floatx80_to_float64(fpa11->fpreg[Fd].fExtended); + } + break; + + case typeExtended: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fExtended = + float32_to_floatx80(fpa11->fpreg[Fd].fSingle); + else + fpa11->fpreg[Fd].fExtended = + float64_to_floatx80(fpa11->fpreg[Fd].fDouble); + } + break; + } + + fpa11->fType[Fd] = nDest; + } + + return nRc; +} diff --git a/arch/arm26/nwfpe/fpa11_cpdt.c b/arch/arm26/nwfpe/fpa11_cpdt.c new file mode 100644 index 000000000000..e12db7c51a76 --- /dev/null +++ b/arch/arm26/nwfpe/fpa11_cpdt.c @@ -0,0 +1,368 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.com, 1998-1999 + (c) Philip Blundell, 1998 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +#include <asm/uaccess.h> + +static inline +void loadSingle(const unsigned int Fn,const unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + fpa11->fType[Fn] = typeSingle; + get_user(fpa11->fpreg[Fn].fSingle, pMem); +} + +static inline +void loadDouble(const unsigned int Fn,const unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fDouble; + fpa11->fType[Fn] = typeDouble; + get_user(p[0], &pMem[1]); + get_user(p[1], &pMem[0]); /* sign & exponent */ +} + +static inline +void loadExtended(const unsigned int Fn,const unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fExtended; + fpa11->fType[Fn] = typeExtended; + get_user(p[0], &pMem[0]); /* sign & exponent */ + get_user(p[1], &pMem[2]); /* ls bits */ + get_user(p[2], &pMem[1]); /* ms bits */ +} + +static inline +void loadMultiple(const unsigned int Fn,const unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + register unsigned int *p; + unsigned long x; + + p = (unsigned int*)&(fpa11->fpreg[Fn]); + get_user(x, &pMem[0]); + fpa11->fType[Fn] = (x >> 14) & 0x00000003; + + switch (fpa11->fType[Fn]) + { + case typeSingle: + case typeDouble: + { + get_user(p[0], &pMem[2]); /* Single */ + get_user(p[1], &pMem[1]); /* double msw */ + p[2] = 0; /* empty */ + } + break; + + case typeExtended: + { + get_user(p[1], &pMem[2]); + get_user(p[2], &pMem[1]); /* msw */ + p[0] = (x & 0x80003fff); + } + break; + } +} + +static inline +void storeSingle(const unsigned int Fn,unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + union + { + float32 f; + unsigned int i[1]; + } val; + + switch (fpa11->fType[Fn]) + { + case typeDouble: + val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble); + break; + + case typeExtended: + val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended); + break; + + default: val.f = fpa11->fpreg[Fn].fSingle; + } + + put_user(val.i[0], pMem); +} + +static inline +void storeDouble(const unsigned int Fn,unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + union + { + float64 f; + unsigned int i[2]; + } val; + + switch (fpa11->fType[Fn]) + { + case typeSingle: + val.f = float32_to_float64(fpa11->fpreg[Fn].fSingle); + break; + + case typeExtended: + val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended); + break; + + default: val.f = fpa11->fpreg[Fn].fDouble; + } + + put_user(val.i[1], &pMem[0]); /* msw */ + put_user(val.i[0], &pMem[1]); /* lsw */ +} + +static inline +void storeExtended(const unsigned int Fn,unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + union + { + floatx80 f; + unsigned int i[3]; + } val; + + switch (fpa11->fType[Fn]) + { + case typeSingle: + val.f = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); + break; + + case typeDouble: + val.f = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); + break; + + default: val.f = fpa11->fpreg[Fn].fExtended; + } + + put_user(val.i[0], &pMem[0]); /* sign & exp */ + put_user(val.i[1], &pMem[2]); + put_user(val.i[2], &pMem[1]); /* msw */ +} + +static inline +void storeMultiple(const unsigned int Fn,unsigned int *pMem) +{ + FPA11 *fpa11 = GET_FPA11(); + register unsigned int nType, *p; + + p = (unsigned int*)&(fpa11->fpreg[Fn]); + nType = fpa11->fType[Fn]; + + switch (nType) + { + case typeSingle: + case typeDouble: + { + put_user(p[0], &pMem[2]); /* single */ + put_user(p[1], &pMem[1]); /* double msw */ + put_user(nType << 14, &pMem[0]); + } + break; + + case typeExtended: + { + put_user(p[2], &pMem[1]); /* msw */ + put_user(p[1], &pMem[2]); + put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]); + } + break; + } +} + +unsigned int PerformLDF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1, + write_back = WRITE_BACK(opcode); + + //printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) + { + pBase += 2; + write_back = 0; + } + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : loadSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : loadDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformSTF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1, + write_back = WRITE_BACK(opcode); + + //printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + SetRoundingMode(ROUND_TO_NEAREST); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) + { + pBase += 2; + write_back = 0; + } + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : storeSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : storeDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformLFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal, + write_back = WRITE_BACK(opcode); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) + { + pBase += 2; + write_back = 0; + } + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + loadMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +unsigned int PerformSFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal, + write_back = WRITE_BACK(opcode); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) + { + pBase += 2; + write_back = 0; + } + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + storeMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +#if 1 +unsigned int EmulateCPDT(const unsigned int opcode) +{ + unsigned int nRc = 0; + + //printk("EmulateCPDT(0x%08x)\n",opcode); + + if (LDF_OP(opcode)) + { + nRc = PerformLDF(opcode); + } + else if (LFM_OP(opcode)) + { + nRc = PerformLFM(opcode); + } + else if (STF_OP(opcode)) + { + nRc = PerformSTF(opcode); + } + else if (SFM_OP(opcode)) + { + nRc = PerformSFM(opcode); + } + else + { + nRc = 0; + } + + return nRc; +} +#endif diff --git a/arch/arm26/nwfpe/fpa11_cprt.c b/arch/arm26/nwfpe/fpa11_cprt.c new file mode 100644 index 000000000000..a201076c1f14 --- /dev/null +++ b/arch/arm26/nwfpe/fpa11_cprt.c @@ -0,0 +1,289 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + (c) Philip Blundell, 1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "milieu.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.inl" +#include "fpmodule.h" +#include "fpmodule.inl" + +extern flag floatx80_is_nan(floatx80); +extern flag float64_is_nan( float64); +extern flag float32_is_nan( float32); + +void SetRoundingMode(const unsigned int opcode); + +unsigned int PerformFLT(const unsigned int opcode); +unsigned int PerformFIX(const unsigned int opcode); + +static unsigned int +PerformComparison(const unsigned int opcode); + +unsigned int EmulateCPRT(const unsigned int opcode) +{ + unsigned int nRc = 1; + + //printk("EmulateCPRT(0x%08x)\n",opcode); + + if (opcode & 0x800000) + { + /* This is some variant of a comparison (PerformComparison will + sort out which one). Since most of the other CPRT + instructions are oddball cases of some sort or other it makes + sense to pull this out into a fast path. */ + return PerformComparison(opcode); + } + + /* Hint to GCC that we'd like a jump table rather than a load of CMPs */ + switch ((opcode & 0x700000) >> 20) + { + case FLT_CODE >> 20: nRc = PerformFLT(opcode); break; + case FIX_CODE >> 20: nRc = PerformFIX(opcode); break; + + case WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break; + case RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break; + +#if 0 /* We currently have no use for the FPCR, so there's no point + in emulating it. */ + case WFC_CODE >> 20: writeFPCR(readRegister(getRd(opcode))); + case RFC_CODE >> 20: writeRegister(getRd(opcode),readFPCR()); break; +#endif + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFLT(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + + unsigned int nRc = 1; + SetRoundingMode(opcode); + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + { + fpa11->fType[getFn(opcode)] = typeSingle; + fpa11->fpreg[getFn(opcode)].fSingle = + int32_to_float32(readRegister(getRd(opcode))); + } + break; + + case ROUND_DOUBLE: + { + fpa11->fType[getFn(opcode)] = typeDouble; + fpa11->fpreg[getFn(opcode)].fDouble = + int32_to_float64(readRegister(getRd(opcode))); + } + break; + + case ROUND_EXTENDED: + { + fpa11->fType[getFn(opcode)] = typeExtended; + fpa11->fpreg[getFn(opcode)].fExtended = + int32_to_floatx80(readRegister(getRd(opcode))); + } + break; + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFIX(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + unsigned int nRc = 1; + unsigned int Fn = getFm(opcode); + + SetRoundingMode(opcode); + + switch (fpa11->fType[Fn]) + { + case typeSingle: + { + writeRegister(getRd(opcode), + float32_to_int32(fpa11->fpreg[Fn].fSingle)); + } + break; + + case typeDouble: + { + writeRegister(getRd(opcode), + float64_to_int32(fpa11->fpreg[Fn].fDouble)); + } + break; + + case typeExtended: + { + writeRegister(getRd(opcode), + floatx80_to_int32(fpa11->fpreg[Fn].fExtended)); + } + break; + + default: nRc = 0; + } + + return nRc; +} + + +static unsigned int __inline__ +PerformComparisonOperation(floatx80 Fn, floatx80 Fm) +{ + unsigned int flags = 0; + + /* test for less than condition */ + if (floatx80_lt(Fn,Fm)) + { + flags |= CC_NEGATIVE; + } + + /* test for equal condition */ + if (floatx80_eq(Fn,Fm)) + { + flags |= CC_ZERO; + } + + /* test for greater than or equal condition */ + if (floatx80_lt(Fm,Fn)) + { + flags |= CC_CARRY; + } + + writeConditionCodes(flags); + return 1; +} + +/* This instruction sets the flags N, Z, C, V in the FPSR. */ + +static unsigned int PerformComparison(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + unsigned int Fn, Fm; + floatx80 rFn, rFm; + int e_flag = opcode & 0x400000; /* 1 if CxFE */ + int n_flag = opcode & 0x200000; /* 1 if CNxx */ + unsigned int flags = 0; + + //printk("PerformComparison(0x%08x)\n",opcode); + + Fn = getFn(opcode); + Fm = getFm(opcode); + + /* Check for unordered condition and convert all operands to 80-bit + format. + ?? Might be some mileage in avoiding this conversion if possible. + Eg, if both operands are 32-bit, detect this and do a 32-bit + comparison (cheaper than an 80-bit one). */ + switch (fpa11->fType[Fn]) + { + case typeSingle: + //printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fn].fSingle)) + goto unordered; + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); + break; + + case typeDouble: + //printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fn].fDouble)) + goto unordered; + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); + break; + + case typeExtended: + //printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fn].fExtended)) + goto unordered; + rFn = fpa11->fpreg[Fn].fExtended; + break; + + default: return 0; + } + + if (CONSTANT_FM(opcode)) + { + //printk("Fm is a constant: #%d.\n",Fm); + rFm = getExtendedConstant(Fm); + if (floatx80_is_nan(rFm)) + goto unordered; + } + else + { + //printk("Fm = r%d which contains a ",Fm); + switch (fpa11->fType[Fm]) + { + case typeSingle: + //printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fm].fSingle)) + goto unordered; + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle); + break; + + case typeDouble: + //printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fm].fDouble)) + goto unordered; + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble); + break; + + case typeExtended: + //printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fm].fExtended)) + goto unordered; + rFm = fpa11->fpreg[Fm].fExtended; + break; + + default: return 0; + } + } + + if (n_flag) + { + rFm.high ^= 0x8000; + } + + return PerformComparisonOperation(rFn,rFm); + + unordered: + /* ?? The FPA data sheet is pretty vague about this, in particular + about whether the non-E comparisons can ever raise exceptions. + This implementation is based on a combination of what it says in + the data sheet, observation of how the Acorn emulator actually + behaves (and how programs expect it to) and guesswork. */ + flags |= CC_OVERFLOW; + flags &= ~(CC_ZERO | CC_NEGATIVE); + + if (BIT_AC & readFPSR()) flags |= CC_CARRY; + + if (e_flag) float_raise(float_flag_invalid); + + writeConditionCodes(flags); + return 1; +} diff --git a/arch/arm26/nwfpe/fpmodule.c b/arch/arm26/nwfpe/fpmodule.c new file mode 100644 index 000000000000..528fa710aa34 --- /dev/null +++ b/arch/arm26/nwfpe/fpmodule.c @@ -0,0 +1,182 @@ + +/* + NetWinder Floating Point Emulator + (c) Rebel.com, 1998-1999 + (c) Philip Blundell, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" + +#include <linux/module.h> +#include <linux/version.h> +#include <linux/config.h> + +/* XXX */ +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/init.h> +/* XXX */ + +#include "softfloat.h" +#include "fpopcode.h" +#include "fpmodule.h" +#include "fpa11.inl" + +/* kernel symbols required for signal handling */ +typedef struct task_struct* PTASK; + +#ifdef MODULE +void fp_send_sig(unsigned long sig, PTASK p, int priv); +#if LINUX_VERSION_CODE > 0x20115 +MODULE_AUTHOR("Scott Bambrough <scottb@rebel.com>"); +MODULE_DESCRIPTION("NWFPE floating point emulator"); +#endif + +#else +#define fp_send_sig send_sig +#define kern_fp_enter fp_enter + +extern char fpe_type[]; +#endif + +/* kernel function prototypes required */ +void fp_setup(void); + +/* external declarations for saved kernel symbols */ +extern void (*kern_fp_enter)(void); + +/* Original value of fp_enter from kernel before patched by fpe_init. */ +static void (*orig_fp_enter)(void); + +/* forward declarations */ +extern void nwfpe_enter(void); + +#ifdef MODULE +/* + * Return 0 if we can be unloaded. This can only happen if + * kern_fp_enter is still pointing at nwfpe_enter + */ +static int fpe_unload(void) +{ + return (kern_fp_enter == nwfpe_enter) ? 0 : 1; +} +#endif + +static int __init fpe_init(void) +{ + if (sizeof(FPA11) > sizeof(union fp_state)) { + printk(KERN_ERR "nwfpe: bad structure size\n"); + return -EINVAL; + } + + if (sizeof(FPREG) != 12) { + printk(KERN_ERR "nwfpe: bad register size\n"); + return -EINVAL; + } + +#ifdef MODULE + if (!mod_member_present(&__this_module, can_unload)) + return -EINVAL; + __this_module.can_unload = fpe_unload; +#else + if (fpe_type[0] && strcmp(fpe_type, "nwfpe")) + return 0; +#endif + + /* Display title, version and copyright information. */ + printk(KERN_WARNING "NetWinder Floating Point Emulator V0.95 " + "(c) 1998-1999 Rebel.com\n"); + + /* Save pointer to the old FP handler and then patch ourselves in */ + orig_fp_enter = kern_fp_enter; + kern_fp_enter = nwfpe_enter; + + return 0; +} + +static void __exit fpe_exit(void) +{ + /* Restore the values we saved earlier. */ + kern_fp_enter = orig_fp_enter; +} + +/* +ScottB: November 4, 1998 + +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). + +[1/1/99: Not quite true any more unfortunately. There is Linux-specific +code to access data in user space in some other source files at the +moment (grep for get_user / put_user calls). --philb] + +float_exception_flags is a global variable in SoftFloat. + +This function is called by the SoftFloat routines to raise a floating +point exception. We check the trap enable byte in the FPSR, and raise +a SIGFPE exception if necessary. If not the relevant bits in the +cumulative exceptions flag byte are set and we return. +*/ + +void float_raise(signed char flags) +{ + register unsigned int fpsr, cumulativeTraps; + +#ifdef CONFIG_DEBUG_USER + printk(KERN_DEBUG "NWFPE: %s[%d] takes exception %08x at %p from %08x\n", + current->comm, current->pid, flags, + __builtin_return_address(0), GET_USERREG()[15]); +#endif + + /* Keep SoftFloat exception flags up to date. */ + float_exception_flags |= flags; + + /* Read fpsr and initialize the cumulativeTraps. */ + fpsr = readFPSR(); + cumulativeTraps = 0; + + /* For each type of exception, the cumulative trap exception bit is only + set if the corresponding trap enable bit is not set. */ + if ((!(fpsr & BIT_IXE)) && (flags & BIT_IXC)) + cumulativeTraps |= BIT_IXC; + if ((!(fpsr & BIT_UFE)) && (flags & BIT_UFC)) + cumulativeTraps |= BIT_UFC; + if ((!(fpsr & BIT_OFE)) && (flags & BIT_OFC)) + cumulativeTraps |= BIT_OFC; + if ((!(fpsr & BIT_DZE)) && (flags & BIT_DZC)) + cumulativeTraps |= BIT_DZC; + if ((!(fpsr & BIT_IOE)) && (flags & BIT_IOC)) + cumulativeTraps |= BIT_IOC; + + /* Set the cumulative exceptions flags. */ + if (cumulativeTraps) + writeFPSR(fpsr | cumulativeTraps); + + /* Raise an exception if necessary. */ + if (fpsr & (flags << 16)) + fp_send_sig(SIGFPE, current, 1); +} + +module_init(fpe_init); +module_exit(fpe_exit); diff --git a/arch/arm26/nwfpe/fpmodule.h b/arch/arm26/nwfpe/fpmodule.h new file mode 100644 index 000000000000..ef71aab46a32 --- /dev/null +++ b/arch/arm26/nwfpe/fpmodule.h @@ -0,0 +1,47 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.com, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPMODULE_H__ +#define __FPMODULE_H__ + +#include <linux/config.h> + +#define REG_ORIG_R0 16 +#define REG_CPSR 15 +#define REG_PC 15 +#define REG_LR 14 +#define REG_SP 13 +#define REG_IP 12 +#define REG_FP 11 +#define REG_R10 10 +#define REG_R9 9 +#define REG_R9 9 +#define REG_R8 8 +#define REG_R7 7 +#define REG_R6 6 +#define REG_R5 5 +#define REG_R4 4 +#define REG_R3 3 +#define REG_R2 2 +#define REG_R1 1 +#define REG_R0 0 + +#endif diff --git a/arch/arm26/nwfpe/fpmodule.inl b/arch/arm26/nwfpe/fpmodule.inl new file mode 100644 index 000000000000..ef228378ffaf --- /dev/null +++ b/arch/arm26/nwfpe/fpmodule.inl @@ -0,0 +1,84 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +extern __inline__ +unsigned int readRegister(const unsigned int nReg) +{ + /* Note: The CPU thinks it has dealt with the current instruction. As + a result the program counter has been advanced to the next + instruction, and points 4 bytes beyond the actual instruction + that caused the invalid instruction trap to occur. We adjust + for this in this routine. LDF/STF instructions with Rn = PC + depend on the PC being correct, as they use PC+8 in their + address calculations. */ + unsigned int *userRegisters = GET_USERREG(); + unsigned int val = userRegisters[nReg]; + if (REG_PC == nReg) val -= 4; + return val; +} + +extern __inline__ +void writeRegister(const unsigned int nReg, const unsigned int val) +{ + unsigned int *userRegisters = GET_USERREG(); + userRegisters[nReg] = val; +} + +extern __inline__ +unsigned int readCPSR(void) +{ + return(readRegister(REG_CPSR)); +} + +extern __inline__ +void writeCPSR(const unsigned int val) +{ + writeRegister(REG_CPSR,val); +} + +extern __inline__ +unsigned int readConditionCodes(void) +{ +#ifdef __FPEM_TEST__ + return(0); +#else + return(readCPSR() & CC_MASK); +#endif +} + +extern __inline__ +void writeConditionCodes(const unsigned int val) +{ + unsigned int *userRegisters = GET_USERREG(); + unsigned int rval; + /* + * Operate directly on userRegisters since + * the CPSR may be the PC register itself. + */ + rval = userRegisters[REG_CPSR] & ~CC_MASK; + userRegisters[REG_CPSR] = rval | (val & CC_MASK); +} + +extern __inline__ +unsigned int readMemoryInt(unsigned int *pMem) +{ + return *pMem; +} diff --git a/arch/arm26/nwfpe/fpopcode.c b/arch/arm26/nwfpe/fpopcode.c new file mode 100644 index 000000000000..d81ddd188322 --- /dev/null +++ b/arch/arm26/nwfpe/fpopcode.c @@ -0,0 +1,148 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpsr.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +const floatx80 floatx80Constant[] = { + { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */ + { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */ + { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */ + { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */ + { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */ + { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */ + { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */ + { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */ +}; + +const float64 float64Constant[] = { + 0x0000000000000000ULL, /* double 0.0 */ + 0x3ff0000000000000ULL, /* double 1.0 */ + 0x4000000000000000ULL, /* double 2.0 */ + 0x4008000000000000ULL, /* double 3.0 */ + 0x4010000000000000ULL, /* double 4.0 */ + 0x4014000000000000ULL, /* double 5.0 */ + 0x3fe0000000000000ULL, /* double 0.5 */ + 0x4024000000000000ULL /* double 10.0 */ +}; + +const float32 float32Constant[] = { + 0x00000000, /* single 0.0 */ + 0x3f800000, /* single 1.0 */ + 0x40000000, /* single 2.0 */ + 0x40400000, /* single 3.0 */ + 0x40800000, /* single 4.0 */ + 0x40a00000, /* single 5.0 */ + 0x3f000000, /* single 0.5 */ + 0x41200000 /* single 10.0 */ +}; + +unsigned int getTransferLength(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case 0x00000000: nRc = 1; break; /* single precision */ + case 0x00008000: nRc = 2; break; /* double precision */ + case 0x00400000: nRc = 3; break; /* extended precision */ + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRegisterCount(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_REGISTER_COUNT) + { + case 0x00000000: nRc = 4; break; + case 0x00008000: nRc = 1; break; + case 0x00400000: nRc = 2; break; + case 0x00408000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRoundingPrecision(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case 0x00000000: nRc = 1; break; + case 0x00000080: nRc = 2; break; + case 0x00080000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getDestinationSize(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_DESTINATION_SIZE) + { + case 0x00000000: nRc = typeSingle; break; + case 0x00000080: nRc = typeDouble; break; + case 0x00080000: nRc = typeExtended; break; + default: nRc = typeNone; + } + + return(nRc); +} + +/* condition code lookup table + index into the table is test code: EQ, NE, ... LT, GT, AL, NV + bit position in short is condition code: NZCV */ +static const unsigned short aCC[16] = { + 0xF0F0, // EQ == Z set + 0x0F0F, // NE + 0xCCCC, // CS == C set + 0x3333, // CC + 0xFF00, // MI == N set + 0x00FF, // PL + 0xAAAA, // VS == V set + 0x5555, // VC + 0x0C0C, // HI == C set && Z clear + 0xF3F3, // LS == C clear || Z set + 0xAA55, // GE == (N==V) + 0x55AA, // LT == (N!=V) + 0x0A05, // GT == (!Z && (N==V)) + 0xF5FA, // LE == (Z || (N!=V)) + 0xFFFF, // AL always + 0 // NV +}; + +unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes) +{ + return (aCC[opcode>>28] >> (ccodes>>28)) & 1; +} diff --git a/arch/arm26/nwfpe/fpopcode.h b/arch/arm26/nwfpe/fpopcode.h new file mode 100644 index 000000000000..13c7419262ab --- /dev/null +++ b/arch/arm26/nwfpe/fpopcode.h @@ -0,0 +1,390 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPOPCODE_H__ +#define __FPOPCODE_H__ + +/* +ARM Floating Point Instruction Classes +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 0 P|U|u|W|L| Rn |v| Fd |0|0|0|1| o f f s e t | CPDT +|c o n d|1 1 0 P|U|w|W|L| Rn |x| Fd |0|0|0|1| o f f s e t | CPDT +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 1 0|a|b|c|d|e| Fn |j| Fd |0|0|0|1|f|g|h|0|i| Fm | CPDO +|c o n d|1 1 1 0|a|b|c|L|e| Fn | Rd |0|0|0|1|f|g|h|1|i| Fm | CPRT +|c o n d|1 1 1 0|a|b|c|1|e| Fn |1|1|1|1|0|0|0|1|f|g|h|1|i| Fm | comparisons +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + +CPDT data transfer instructions + LDF, STF, LFM, SFM + +CPDO dyadic arithmetic instructions + ADF, MUF, SUF, RSF, DVF, RDF, + POW, RPW, RMF, FML, FDV, FRD, POL + +CPDO monadic arithmetic instructions + MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP, + SIN, COS, TAN, ASN, ACS, ATN, URD, NRM + +CPRT joint arithmetic/data transfer instructions + FIX (arithmetic followed by load/store) + FLT (load/store followed by arithmetic) + CMF, CNF CMFE, CNFE (comparisons) + WFS, RFS (write/read floating point status register) + WFC, RFC (write/read floating point control register) + +cond condition codes +P pre/post index bit: 0 = postindex, 1 = preindex +U up/down bit: 0 = stack grows down, 1 = stack grows up +W write back bit: 1 = update base register (Rn) +L load/store bit: 0 = store, 1 = load +Rn base register +Rd destination/source register +Fd floating point destination register +Fn floating point source register +Fm floating point source register or floating point constant + +uv transfer length (TABLE 1) +wx register count (TABLE 2) +abcd arithmetic opcode (TABLES 3 & 4) +ef destination size (rounding precision) (TABLE 5) +gh rounding mode (TABLE 6) +j dyadic/monadic bit: 0 = dyadic, 1 = monadic +i constant bit: 1 = constant (TABLE 6) +*/ + +/* +TABLE 1 ++-------------------------+---+---+---------+---------+ +| Precision | u | v | FPSR.EP | length | ++-------------------------+---+---+---------+---------+ +| Single | 0 ü 0 | x | 1 words | +| Double | 1 ü 1 | x | 2 words | +| Extended | 1 ü 1 | x | 3 words | +| Packed decimal | 1 ü 1 | 0 | 3 words | +| Expanded packed decimal | 1 ü 1 | 1 | 4 words | ++-------------------------+---+---+---------+---------+ +Note: x = don't care +*/ + +/* +TABLE 2 ++---+---+---------------------------------+ +| w | x | Number of registers to transfer | ++---+---+---------------------------------+ +| 0 ü 1 | 1 | +| 1 ü 0 | 2 | +| 1 ü 1 | 3 | +| 0 ü 0 | 4 | ++---+---+---------------------------------+ +*/ + +/* +TABLE 3: Dyadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | ADF | Add | Fd := Fn + Fm | +| 0 | 0 | 0 | 1 | MUF | Multiply | Fd := Fn * Fm | +| 0 | 0 | 1 | 0 | SUF | Subtract | Fd := Fn - Fm | +| 0 | 0 | 1 | 1 | RSF | Reverse subtract | Fd := Fm - Fn | +| 0 | 1 | 0 | 0 | DVF | Divide | Fd := Fn / Fm | +| 0 | 1 | 0 | 1 | RDF | Reverse divide | Fd := Fm / Fn | +| 0 | 1 | 1 | 0 | POW | Power | Fd := Fn ^ Fm | +| 0 | 1 | 1 | 1 | RPW | Reverse power | Fd := Fm ^ Fn | +| 1 | 0 | 0 | 0 | RMF | Remainder | Fd := IEEE rem(Fn/Fm) | +| 1 | 0 | 0 | 1 | FML | Fast Multiply | Fd := Fn * Fm | +| 1 | 0 | 1 | 0 | FDV | Fast Divide | Fd := Fn / Fm | +| 1 | 0 | 1 | 1 | FRD | Fast reverse divide | Fd := Fm / Fn | +| 1 | 1 | 0 | 0 | POL | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm) | +| 1 | 1 | 0 | 1 | | undefined instruction | trap | +| 1 | 1 | 1 | 0 | | undefined instruction | trap | +| 1 | 1 | 1 | 1 | | undefined instruction | trap | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: POW, RPW, POL are deprecated, and are available for backwards + compatibility only. +*/ + +/* +TABLE 4: Monadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | MVF | Move | Fd := Fm | +| 0 | 0 | 0 | 1 | MNF | Move negated | Fd := - Fm | +| 0 | 0 | 1 | 0 | ABS | Absolute value | Fd := abs(Fm) | +| 0 | 0 | 1 | 1 | RND | Round to integer | Fd := int(Fm) | +| 0 | 1 | 0 | 0 | SQT | Square root | Fd := sqrt(Fm) | +| 0 | 1 | 0 | 1 | LOG | Log base 10 | Fd := log10(Fm) | +| 0 | 1 | 1 | 0 | LGN | Log base e | Fd := ln(Fm) | +| 0 | 1 | 1 | 1 | EXP | Exponent | Fd := e ^ Fm | +| 1 | 0 | 0 | 0 | SIN | Sine | Fd := sin(Fm) | +| 1 | 0 | 0 | 1 | COS | Cosine | Fd := cos(Fm) | +| 1 | 0 | 1 | 0 | TAN | Tangent | Fd := tan(Fm) | +| 1 | 0 | 1 | 1 | ASN | Arc Sine | Fd := arcsin(Fm) | +| 1 | 1 | 0 | 0 | ACS | Arc Cosine | Fd := arccos(Fm) | +| 1 | 1 | 0 | 1 | ATN | Arc Tangent | Fd := arctan(Fm) | +| 1 | 1 | 1 | 0 | URD | Unnormalized round | Fd := int(Fm) | +| 1 | 1 | 1 | 1 | NRM | Normalize | Fd := norm(Fm) | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are + available for backwards compatibility only. +*/ + +/* +TABLE 5 ++-------------------------+---+---+ +| Rounding Precision | e | f | ++-------------------------+---+---+ +| IEEE Single precision | 0 ü 0 | +| IEEE Double precision | 0 ü 1 | +| IEEE Extended precision | 1 ü 0 | +| undefined (trap) | 1 ü 1 | ++-------------------------+---+---+ +*/ + +/* +TABLE 5 ++---------------------------------+---+---+ +| Rounding Mode | g | h | ++---------------------------------+---+---+ +| Round to nearest (default) | 0 ü 0 | +| Round toward plus infinity | 0 ü 1 | +| Round toward negative infinity | 1 ü 0 | +| Round toward zero | 1 ü 1 | ++---------------------------------+---+---+ +*/ + +/* +=== +=== Definitions for load and store instructions +=== +*/ + +/* bit masks */ +#define BIT_PREINDEX 0x01000000 +#define BIT_UP 0x00800000 +#define BIT_WRITE_BACK 0x00200000 +#define BIT_LOAD 0x00100000 + +/* masks for load/store */ +#define MASK_CPDT 0x0c000000 /* data processing opcode */ +#define MASK_OFFSET 0x000000ff +#define MASK_TRANSFER_LENGTH 0x00408000 +#define MASK_REGISTER_COUNT MASK_TRANSFER_LENGTH +#define MASK_COPROCESSOR 0x00000f00 + +/* Tests for transfer length */ +#define TRANSFER_SINGLE 0x00000000 +#define TRANSFER_DOUBLE 0x00008000 +#define TRANSFER_EXTENDED 0x00400000 +#define TRANSFER_PACKED MASK_TRANSFER_LENGTH + +/* Get the coprocessor number from the opcode. */ +#define getCoprocessorNumber(opcode) ((opcode & MASK_COPROCESSOR) >> 8) + +/* Get the offset from the opcode. */ +#define getOffset(opcode) (opcode & MASK_OFFSET) + +/* Tests for specific data transfer load/store opcodes. */ +#define TEST_OPCODE(opcode,mask) (((opcode) & (mask)) == (mask)) + +#define LOAD_OP(opcode) TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD) +#define STORE_OP(opcode) ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT) + +#define LDF_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define LFM_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) +#define STF_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define SFM_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) + +#define PREINDEXED(opcode) ((opcode & BIT_PREINDEX) != 0) +#define POSTINDEXED(opcode) ((opcode & BIT_PREINDEX) == 0) +#define BIT_UP_SET(opcode) ((opcode & BIT_UP) != 0) +#define BIT_UP_CLEAR(opcode) ((opcode & BIT_DOWN) == 0) +#define WRITE_BACK(opcode) ((opcode & BIT_WRITE_BACK) != 0) +#define LOAD(opcode) ((opcode & BIT_LOAD) != 0) +#define STORE(opcode) ((opcode & BIT_LOAD) == 0) + +/* +=== +=== Definitions for arithmetic instructions +=== +*/ +/* bit masks */ +#define BIT_MONADIC 0x00008000 +#define BIT_CONSTANT 0x00000008 + +#define CONSTANT_FM(opcode) ((opcode & BIT_CONSTANT) != 0) +#define MONADIC_INSTRUCTION(opcode) ((opcode & BIT_MONADIC) != 0) + +/* instruction identification masks */ +#define MASK_CPDO 0x0e000000 /* arithmetic opcode */ +#define MASK_ARITHMETIC_OPCODE 0x00f08000 +#define MASK_DESTINATION_SIZE 0x00080080 + +/* dyadic arithmetic opcodes. */ +#define ADF_CODE 0x00000000 +#define MUF_CODE 0x00100000 +#define SUF_CODE 0x00200000 +#define RSF_CODE 0x00300000 +#define DVF_CODE 0x00400000 +#define RDF_CODE 0x00500000 +#define POW_CODE 0x00600000 +#define RPW_CODE 0x00700000 +#define RMF_CODE 0x00800000 +#define FML_CODE 0x00900000 +#define FDV_CODE 0x00a00000 +#define FRD_CODE 0x00b00000 +#define POL_CODE 0x00c00000 +/* 0x00d00000 is an invalid dyadic arithmetic opcode */ +/* 0x00e00000 is an invalid dyadic arithmetic opcode */ +/* 0x00f00000 is an invalid dyadic arithmetic opcode */ + +/* monadic arithmetic opcodes. */ +#define MVF_CODE 0x00008000 +#define MNF_CODE 0x00108000 +#define ABS_CODE 0x00208000 +#define RND_CODE 0x00308000 +#define SQT_CODE 0x00408000 +#define LOG_CODE 0x00508000 +#define LGN_CODE 0x00608000 +#define EXP_CODE 0x00708000 +#define SIN_CODE 0x00808000 +#define COS_CODE 0x00908000 +#define TAN_CODE 0x00a08000 +#define ASN_CODE 0x00b08000 +#define ACS_CODE 0x00c08000 +#define ATN_CODE 0x00d08000 +#define URD_CODE 0x00e08000 +#define NRM_CODE 0x00f08000 + +/* +=== +=== Definitions for register transfer and comparison instructions +=== +*/ + +#define MASK_CPRT 0x0e000010 /* register transfer opcode */ +#define MASK_CPRT_CODE 0x00f00000 +#define FLT_CODE 0x00000000 +#define FIX_CODE 0x00100000 +#define WFS_CODE 0x00200000 +#define RFS_CODE 0x00300000 +#define WFC_CODE 0x00400000 +#define RFC_CODE 0x00500000 +#define CMF_CODE 0x00900000 +#define CNF_CODE 0x00b00000 +#define CMFE_CODE 0x00d00000 +#define CNFE_CODE 0x00f00000 + +/* +=== +=== Common definitions +=== +*/ + +/* register masks */ +#define MASK_Rd 0x0000f000 +#define MASK_Rn 0x000f0000 +#define MASK_Fd 0x00007000 +#define MASK_Fm 0x00000007 +#define MASK_Fn 0x00070000 + +/* condition code masks */ +#define CC_MASK 0xf0000000 +#define CC_NEGATIVE 0x80000000 +#define CC_ZERO 0x40000000 +#define CC_CARRY 0x20000000 +#define CC_OVERFLOW 0x10000000 +#define CC_EQ 0x00000000 +#define CC_NE 0x10000000 +#define CC_CS 0x20000000 +#define CC_HS CC_CS +#define CC_CC 0x30000000 +#define CC_LO CC_CC +#define CC_MI 0x40000000 +#define CC_PL 0x50000000 +#define CC_VS 0x60000000 +#define CC_VC 0x70000000 +#define CC_HI 0x80000000 +#define CC_LS 0x90000000 +#define CC_GE 0xa0000000 +#define CC_LT 0xb0000000 +#define CC_GT 0xc0000000 +#define CC_LE 0xd0000000 +#define CC_AL 0xe0000000 +#define CC_NV 0xf0000000 + +/* rounding masks/values */ +#define MASK_ROUNDING_MODE 0x00000060 +#define ROUND_TO_NEAREST 0x00000000 +#define ROUND_TO_PLUS_INFINITY 0x00000020 +#define ROUND_TO_MINUS_INFINITY 0x00000040 +#define ROUND_TO_ZERO 0x00000060 + +#define MASK_ROUNDING_PRECISION 0x00080080 +#define ROUND_SINGLE 0x00000000 +#define ROUND_DOUBLE 0x00000080 +#define ROUND_EXTENDED 0x00080000 + +/* Get the condition code from the opcode. */ +#define getCondition(opcode) (opcode >> 28) + +/* Get the source register from the opcode. */ +#define getRn(opcode) ((opcode & MASK_Rn) >> 16) + +/* Get the destination floating point register from the opcode. */ +#define getFd(opcode) ((opcode & MASK_Fd) >> 12) + +/* Get the first source floating point register from the opcode. */ +#define getFn(opcode) ((opcode & MASK_Fn) >> 16) + +/* Get the second source floating point register from the opcode. */ +#define getFm(opcode) (opcode & MASK_Fm) + +/* Get the destination register from the opcode. */ +#define getRd(opcode) ((opcode & MASK_Rd) >> 12) + +/* Get the rounding mode from the opcode. */ +#define getRoundingMode(opcode) ((opcode & MASK_ROUNDING_MODE) >> 5) + +static inline const floatx80 getExtendedConstant(const unsigned int nIndex) +{ + extern const floatx80 floatx80Constant[]; + return floatx80Constant[nIndex]; +} + +static inline const float64 getDoubleConstant(const unsigned int nIndex) +{ + extern const float64 float64Constant[]; + return float64Constant[nIndex]; +} + +static inline const float32 getSingleConstant(const unsigned int nIndex) +{ + extern const float32 float32Constant[]; + return float32Constant[nIndex]; +} + +extern unsigned int getRegisterCount(const unsigned int opcode); +extern unsigned int getDestinationSize(const unsigned int opcode); + +#endif diff --git a/arch/arm26/nwfpe/fpsr.h b/arch/arm26/nwfpe/fpsr.h new file mode 100644 index 000000000000..6dafb0f5243c --- /dev/null +++ b/arch/arm26/nwfpe/fpsr.h @@ -0,0 +1,108 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.com, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPSR_H__ +#define __FPSR_H__ + +/* +The FPSR is a 32 bit register consisting of 4 parts, each exactly +one byte. + + SYSTEM ID + EXCEPTION TRAP ENABLE BYTE + SYSTEM CONTROL BYTE + CUMULATIVE EXCEPTION FLAGS BYTE + +The FPCR is a 32 bit register consisting of bit flags. +*/ + +/* SYSTEM ID +------------ +Note: the system id byte is read only */ + +typedef unsigned int FPSR; /* type for floating point status register */ +typedef unsigned int FPCR; /* type for floating point control register */ + +#define MASK_SYSID 0xff000000 +#define BIT_HARDWARE 0x80000000 +#define FP_EMULATOR 0x01000000 /* System ID for emulator */ +#define FP_ACCELERATOR 0x81000000 /* System ID for FPA11 */ + +/* EXCEPTION TRAP ENABLE BYTE +----------------------------- */ + +#define MASK_TRAP_ENABLE 0x00ff0000 +#define MASK_TRAP_ENABLE_STRICT 0x001f0000 +#define BIT_IXE 0x00100000 /* inexact exception enable */ +#define BIT_UFE 0x00080000 /* underflow exception enable */ +#define BIT_OFE 0x00040000 /* overflow exception enable */ +#define BIT_DZE 0x00020000 /* divide by zero exception enable */ +#define BIT_IOE 0x00010000 /* invalid operation exception enable */ + +/* SYSTEM CONTROL BYTE +---------------------- */ + +#define MASK_SYSTEM_CONTROL 0x0000ff00 +#define MASK_TRAP_STRICT 0x00001f00 + +#define BIT_AC 0x00001000 /* use alternative C-flag definition + for compares */ +#define BIT_EP 0x00000800 /* use expanded packed decimal format */ +#define BIT_SO 0x00000400 /* select synchronous operation of FPA */ +#define BIT_NE 0x00000200 /* NaN exception bit */ +#define BIT_ND 0x00000100 /* no denormalized numbers bit */ + +/* CUMULATIVE EXCEPTION FLAGS BYTE +---------------------------------- */ + +#define MASK_EXCEPTION_FLAGS 0x000000ff +#define MASK_EXCEPTION_FLAGS_STRICT 0x0000001f + +#define BIT_IXC 0x00000010 /* inexact exception flag */ +#define BIT_UFC 0x00000008 /* underflow exception flag */ +#define BIT_OFC 0x00000004 /* overfloat exception flag */ +#define BIT_DZC 0x00000002 /* divide by zero exception flag */ +#define BIT_IOC 0x00000001 /* invalid operation exception flag */ + +/* Floating Point Control Register +----------------------------------*/ + +#define BIT_RU 0x80000000 /* rounded up bit */ +#define BIT_IE 0x10000000 /* inexact bit */ +#define BIT_MO 0x08000000 /* mantissa overflow bit */ +#define BIT_EO 0x04000000 /* exponent overflow bit */ +#define BIT_SB 0x00000800 /* store bounce */ +#define BIT_AB 0x00000400 /* arithmetic bounce */ +#define BIT_RE 0x00000200 /* rounding exception */ +#define BIT_DA 0x00000100 /* disable FPA */ + +#define MASK_OP 0x00f08010 /* AU operation code */ +#define MASK_PR 0x00080080 /* AU precision */ +#define MASK_S1 0x00070000 /* AU source register 1 */ +#define MASK_S2 0x00000007 /* AU source register 2 */ +#define MASK_DS 0x00007000 /* AU destination register */ +#define MASK_RM 0x00000060 /* AU rounding mode */ +#define MASK_ALU 0x9cfff2ff /* only ALU can write these bits */ +#define MASK_RESET 0x00000d00 /* bits set on reset, all others cleared */ +#define MASK_WFC MASK_RESET +#define MASK_RFC ~MASK_RESET + +#endif diff --git a/arch/arm26/nwfpe/milieu.h b/arch/arm26/nwfpe/milieu.h new file mode 100644 index 000000000000..a3892ab2dca4 --- /dev/null +++ b/arch/arm26/nwfpe/milieu.h @@ -0,0 +1,48 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "ARM-gcc.h" + +/* +------------------------------------------------------------------------------- +Symbolic Boolean literals. +------------------------------------------------------------------------------- +*/ +enum { + FALSE = 0, + TRUE = 1 +}; + diff --git a/arch/arm26/nwfpe/single_cpdo.c b/arch/arm26/nwfpe/single_cpdo.c new file mode 100644 index 000000000000..5cdcddbb8999 --- /dev/null +++ b/arch/arm26/nwfpe/single_cpdo.c @@ -0,0 +1,255 @@ +/* + NetWinder Floating Point Emulator + (c) Rebel.COM, 1998,1999 + + Direct questions, comments to Scott Bambrough <scottb@netwinder.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" +#include "softfloat.h" +#include "fpopcode.h" + +float32 float32_exp(float32 Fm); +float32 float32_ln(float32 Fm); +float32 float32_sin(float32 rFm); +float32 float32_cos(float32 rFm); +float32 float32_arcsin(float32 rFm); +float32 float32_arctan(float32 rFm); +float32 float32_log(float32 rFm); +float32 float32_tan(float32 rFm); +float32 float32_arccos(float32 rFm); +float32 float32_pow(float32 rFn,float32 rFm); +float32 float32_pol(float32 rFn,float32 rFm); + +unsigned int SingleCPDO(const unsigned int opcode) +{ + FPA11 *fpa11 = GET_FPA11(); + float32 rFm, rFn = 0; //FIXME - should be zero? + unsigned int Fd, Fm, Fn, nRc = 1; + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getSingleConstant(Fm); + } + else + { + switch (fpa11->fType[Fm]) + { + case typeSingle: + rFm = fpa11->fpreg[Fm].fSingle; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fType[Fn]) + { + case typeSingle: + rFn = fpa11->fpreg[Fn].fSingle; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fSingle = float32_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fSingle = float32_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fSingle = float32_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fSingle = rFm; + break; + + case MNF_CODE: + rFm ^= 0x80000000; + fpa11->fpreg[Fd].fSingle = rFm; + break; + + case ABS_CODE: + rFm &= 0x7fffffff; + fpa11->fpreg[Fd].fSingle = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fSingle = float32_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fSingle = float32_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fSingle = float32_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fSingle = float32_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fSingle = float32_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fSingle = float32_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fSingle = float32_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fSingle = float32_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fSingle = float32_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fType[Fd] = typeSingle; + return nRc; +} + +#if 0 +float32 float32_exp(float32 Fm) +{ +//series +} + +float32 float32_ln(float32 Fm) +{ +//series +} + +float32 float32_sin(float32 rFm) +{ +//series +} + +float32 float32_cos(float32 rFm) +{ +//series +} + +float32 float32_arcsin(float32 rFm) +{ +//series +} + +float32 float32_arctan(float32 rFm) +{ + //series +} + +float32 float32_arccos(float32 rFm) +{ + //return float32_sub(halfPi,float32_arcsin(rFm)); +} + +float32 float32_log(float32 rFm) +{ + return float32_div(float32_ln(rFm),getSingleConstant(7)); +} + +float32 float32_tan(float32 rFm) +{ + return float32_div(float32_sin(rFm),float32_cos(rFm)); +} + +float32 float32_pow(float32 rFn,float32 rFm) +{ + return float32_exp(float32_mul(rFm,float32_ln(rFn))); +} + +float32 float32_pol(float32 rFn,float32 rFm) +{ + return float32_arctan(float32_div(rFn,rFm)); +} +#endif diff --git a/arch/arm26/nwfpe/softfloat-macros b/arch/arm26/nwfpe/softfloat-macros new file mode 100644 index 000000000000..5469989f2c5e --- /dev/null +++ b/arch/arm26/nwfpe/softfloat-macros @@ -0,0 +1,740 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 64, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +{ + bits64 z; + + __asm__("@shift64RightJamming -- start"); + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + __asm__("@shift64RightJamming -- end"); + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +_plus_ the number of bits given in `count'. The shifted result is at most +64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +bits shifted off form a second 64-bit result as follows: The _last_ bit +shifted off is the most-significant bit of the extra result, and the other +63 bits of the extra result are all zero if and only if _all_but_the_last_ +bits shifted off were all zero. This extra result is stored in the location +pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0' and `a1' are considered to form a +fixed-point value with binary point between `a0' and `a1'. This fixed-point +value is shifted right by the number of bits given in `count', and the +integer part of the result is returned at the location pointed to by +`z0Ptr'. The fractional part of the result may be slightly corrupted as +described above, and is returned at the location pointed to by `z1Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift64ExtraRightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1 != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 128, the result will be 0. The result is broken into two 64-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128Right( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 128, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 64-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128RightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right +by 64 _plus_ the number of bits given in `count'. The shifted result is +at most 128 nonzero bits; these are broken into two 64-bit pieces which are +stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +off form a third 64-bit result as follows: The _last_ bit shifted off is +the most-significant bit of the extra result, and the other 63 bits of the +extra result are all zero if and only if _all_but_the_last_ bits shifted off +were all zero. This extra result is stored in the location pointed to by +`z2Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0', `a1', and `a2' are considered +to form a fixed-point value with binary point between `a1' and `a2'. This +fixed-point value is shifted right by the number of bits given in `count', +and the integer part of the result is returned at the locations pointed to +by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +corrupted as described above, and is returned at the location pointed to by +`z2Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift128ExtraRightJamming( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if ( count < 64 ) { + z2 = a1<<negCount; + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<<negCount; + z1 = a0>>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 64. The result is broken into two 64-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift128Left( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1<<count; + *z0Ptr = + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 64. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift192Left( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount; + + z2 = a2<<count; + z1 = a1<<count; + z0 = a0<<count; + if ( 0 < count ) { + negCount = ( ( - count ) & 63 ); + z1 |= a2>>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +any carry out is lost. The result is broken into two 64-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^192, so any carry out is lost. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^128, so any borrow out (carry out) is lost. The result is broken into two +64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +result is broken into three 64-bit pieces which are stored at the locations +pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +into two 64-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits32 aHigh, aLow, bHigh, bLow; + bits64 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>32; + bLow = b; + bHigh = b>>32; + z1 = ( (bits64) aLow ) * bLow; + zMiddleA = ( (bits64) aLow ) * bHigh; + zMiddleB = ( (bits64) aHigh ) * bLow; + z0 = ( (bits64) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to +obtain a 192-bit product. The product is broken into three 64-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128By64To192( + bits64 a0, + bits64 a1, + bits64 b, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +product. The product is broken into four 64-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128To256( + bits64 a0, + bits64 a1, + bits64 b0, + bits64 b1, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr, + bits64 *z3Ptr + ) +{ + bits64 z0, z1, z2, z3; + bits64 more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 64-bit integer quotient obtained by dividing +`b' into the 128-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^63. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +{ + bits64 b0, b1; + bits64 rem0, rem1, term0, term1; + bits64 z; + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits64) rem0 ) < 0 ) { + z -= LIT64( 0x100000000 ); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 64 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros64( bits64 a ) +{ + int8 shiftCount; + + shiftCount = 0; + if ( a < ( (bits64) 1 )<<32 ) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += countLeadingZeros32( a ); + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +is equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +not equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/arch/arm26/nwfpe/softfloat-specialize b/arch/arm26/nwfpe/softfloat-specialize new file mode 100644 index 000000000000..acf409144763 --- /dev/null +++ b/arch/arm26/nwfpe/softfloat-specialize @@ -0,0 +1,366 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. + +ScottB: November 4, 1998 +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). +------------------------------------------------------------------------------- +void float_raise( int8 flags ) +{ + float_exception_flags |= flags; +} +*/ + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>63; + z.low = 0; + z.high = a<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= LIT64( 0x0008000000000000 ); + b |= LIT64( 0x0008000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + //register int lr; + bits64 aLow; + + //__asm__("mov %0, lr" : : "g" (lr)); + //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr); + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif diff --git a/arch/arm26/nwfpe/softfloat.c b/arch/arm26/nwfpe/softfloat.c new file mode 100644 index 000000000000..26c1b916e527 --- /dev/null +++ b/arch/arm26/nwfpe/softfloat.c @@ -0,0 +1,3439 @@ +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include "fpa11.h" +#include "milieu.h" +#include "softfloat.h" + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode, extended double-precision rounding precision, +and exception flags. +------------------------------------------------------------------------------- +*/ +int8 float_rounding_mode = float_round_nearest_even; +int8 floatx80_rounding_precision = 80; +int8 float_exception_flags; + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +/* +------------------------------------------------------------------------------- +Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +and 7, and returns the properly rounded 32-bit integer corresponding to the +input. If `zSign' is nonzero, the input is negated before being converted +to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point +input is simply rounded to an integer, with the inexact exception raised if +the input cannot be represented exactly as an integer. If the fixed-point +input is too large, however, the invalid exception is raised and the largest +positive or negative integer is returned. +------------------------------------------------------------------------------- +*/ +static int32 roundAndPackInt32( flag zSign, bits64 absZ ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + int32 z; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_exception_flags |= float_flag_invalid; + return zSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +single-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ +#if 0 + float32 f; + __asm__("@ packFloat32; \n\ + mov %0, %1, asl #31; \n\ + orr %0, %2, asl #23; \n\ + orr %0, %3" + : /* no outputs */ + : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig) + : "cc"); + return f; +#else + return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; +#endif +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the single-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal single- +precision floating-point number. + The input significand `zSig' has its binary point between bits 30 +and 29, which is 7 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (bits16) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat64Frac( float64 a ) +{ + + return a & LIT64( 0x000FFFFFFFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return ( a>>52 ) & 0x7FF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return a>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ) - 11; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +double-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + + return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the double-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal double- +precision floating-point number. + The input significand `zSig' has its binary point between bits 62 +and 61, which is 10 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int16 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x200; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x3FF; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (bits16) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) + ) { + //register int lr = __builtin_return_address(0); + //printk("roundAndPackFloat64 called from 0x%08x\n",lr); + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat64' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float64 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloatx80Frac( floatx80 a ) +{ + + return a.low; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int32 extractFloatx80Exp( floatx80 a ) +{ + + return a.high & 0x7FFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the extended double-precision floating-point value +`a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloatx80Sign( floatx80 a ) +{ + + return a.high>>15; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal extended double-precision floating-point value +represented by the denormalized significand `aSig'. The normalized exponent +and significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ); + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +extended double-precision floating-point value, returning the result. +------------------------------------------------------------------------------- +*/ +INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) +{ + floatx80 z; + + z.low = zSig; + z.high = ( ( (bits16) zSign )<<15 ) + zExp; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +rounded and packed into the extended double-precision format, with the +inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal extended +double-precision floating-point number. + If `roundingPrecision' is 32 or 64, the result is rounded to the same +number of bits as single or double precision, respectively. Otherwise, the +result is rounded to the full precision of the extended double-precision +format. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. The +handling of underflow and overflow follows the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 + roundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + int64 roundIncrement, roundMask, roundBits; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( (sbits64) zSig0 < 0 ) zExp = 1; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + increment = ( (sbits64) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( roundNearestEven ) { + increment = ( (sbits64) zSig1 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + if ( (sbits64) zSig0 < 0 ) zExp = 1; + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + + return packFloatx80( zSign, zExp, zSig0 ); +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent +`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. This routine is just like +`roundAndPackFloatx80' except that the input significand does not have to be +normalized. +------------------------------------------------------------------------------- +*/ +static floatx80 + normalizeRoundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 shiftCount; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return + roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the single-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 a ) +{ + flag zSign; + + if ( a == 0 ) return 0; + if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the double-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 int32_to_float64( int32 a ) +{ + flag aSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return 0; + aSign = ( a < 0 ); + absA = aSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' +to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 int32_to_floatx80( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= 0x00800000; + shiftCount = 0xAF - aExp; + zSig = aSig; + zSig <<= 32; + if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); + return roundAndPackInt32( aSign, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a == 0xCF000000 ) return 0x80000000; + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + return 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + float_exception_flags |= float_flag_inexact; + } + return aSign ? - z : z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float32_to_floatx80( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + aSig |= 0x00800000; + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, and +returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig; + bits64 zSig64; + bits32 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); + zSig = zSig64; + if ( 0 <= (sbits32) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = ( ( (bits64) aSig )<<32 ) / bSig; + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig; + bits32 q; + bits64 aSig64, bSig64, q64; + bits32 alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (bits64) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (bits64) aSig )<<40; + bSig64 = ( (bits64) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig; + bits64 rem, term; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0xFFFFFFFF; + } + else { + aSig >>= aExp & 1; + term = ( (bits64) zSig ) * zSig; + rem = ( ( (bits64) aSig )<<32 ) - term; + while ( (sbits64) rem < 0 ) { + --zSig; + rem += ( ( (bits64) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat32( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement unsigned integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = 0; //extractFloat64Sign( a ); + //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + bits32 zSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float64_to_floatx80( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, and +returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FE ) { + if ( (bits64) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_down: + return aSign ? LIT64( 0xBFF0000000000000 ) : 0; + case float_round_up: + return + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits64) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits64) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + bits64 rem0, rem1; + bits64 term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits64 aSig, bSig; + bits64 q, alternateASig; + sbits64 sigMean; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits64) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits64) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits64 aSig, zSig; + bits64 rem0, rem1, term0, term1; //, shiftedRem; + //float64 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + zSig <<= 31; + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2; + if ( ( zSig & 0x3FF ) <= 5 ) { + if ( zSig < 2 ) { + zSig = LIT64( 0xFFFFFFFFFFFFFFFF ); + } + else { + aSig <<= 2; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + shortShift128Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift64RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat64( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic---which means in particular that the conversion +is rounded according to the current rounding mode. If `a' is a NaN, the +largest positive integer is returned. Otherwise, if the conversion +overflows, the largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic, except that the conversion is always rounded +toward zero. If `a' is a NaN, the largest positive integer is returned. +Otherwise, if the conversion overflows, the largest integer with the same +sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32_round_to_zero( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount < 32 ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( 63 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the single-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 floatx80_to_float32( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32( aSign, aExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 floatx80_to_float64( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig, zSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64( aSign, aExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Rounds the extended double-precision floating-point value `a' to an integer, +and returns the result as an extended quadruple-precision floating-point +value. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FFE ) { + if ( ( aExp == 0 ) + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + return a; + } + float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.low += lastBitMask>>1; + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z.low += roundBitsMask; + } + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the extended double- +precision floating-point values `a' and `b'. If `zSign' is true, the sum is +negated before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } + zExp = aExp; + goto shiftRight1; + } + + zSig0 = aSig + bSig; + + if ( (sbits64) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the extended +double-precision floating-point values `a' and `b'. If `zSign' is true, +the difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the extended double-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_add( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs( a, b, aSign ); + } + else { + return subFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sub( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs( a, b, aSign ); + } + else { + return addFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_mul( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (sbits64) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the extended double-precision floating-point +value `a' by the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_div( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + bits64 rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (bits64) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the extended double-precision floating-point value +`a' with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_rem( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig; + bits64 q, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (bits64) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the extended double-precision floating-point +value `a'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sqrt( floatx80 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + bits64 shiftedRem0, shiftedRem1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + zSig0 <<= 31; + aSig1 = 0; + shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + shortShift128Left( 0, zSig0, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); + if ( (bits64) ( zSig1<<1 ) <= 10 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( zSig0, zSig1, &term1, &term2 ); + shortShift128Left( term1, term2, 1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); + term3 |= 1; + add192( + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than or equal to the corresponding value `b', and 0 otherwise. The +comparison is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is equal +to the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq_signaling( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs +do not cause an exception. Otherwise, the comparison is performed according +to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause +an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + diff --git a/arch/arm26/nwfpe/softfloat.h b/arch/arm26/nwfpe/softfloat.h new file mode 100644 index 000000000000..22c2193a4997 --- /dev/null +++ b/arch/arm26/nwfpe/softfloat.h @@ -0,0 +1,232 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#ifndef __SOFTFLOAT_H__ +#define __SOFTFLOAT_H__ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. +------------------------------------------------------------------------------- +*/ +#define FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned long int float32; +typedef unsigned long long float64; +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_detect_tininess; +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_rounding_mode; +enum { + float_round_nearest_even = 0, + float_round_to_zero = 1, + float_round_down = 2, + float_round_up = 3 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +extern signed char float_exception_flags; +enum { + float_flag_inexact = 1, + float_flag_underflow = 2, + float_flag_overflow = 4, + float_flag_divbyzero = 8, + float_flag_invalid = 16 +}; + +ScottB: November 4, 1998 +Changed the enumeration to match the bit order in the FPA11. +*/ + +extern signed char float_exception_flags; +enum { + float_flag_invalid = 1, + float_flag_divbyzero = 2, + float_flag_overflow = 4, + float_flag_underflow = 8, + float_flag_inexact = 16 +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( signed char ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( signed int ); +float64 int32_to_float64( signed int ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( signed int ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float32_to_int32( float32 ); +signed int float32_to_int32_round_to_zero( float32 ); +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +char float32_eq( float32, float32 ); +char float32_le( float32, float32 ); +char float32_lt( float32, float32 ); +char float32_eq_signaling( float32, float32 ); +char float32_le_quiet( float32, float32 ); +char float32_lt_quiet( float32, float32 ); +char float32_is_signaling_nan( float32 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float64_to_int32( float64 ); +signed int float64_to_int32_round_to_zero( float64 ); +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +char float64_eq( float64, float64 ); +char float64_le( float64, float64 ); +char float64_lt( float64, float64 ); +char float64_eq_signaling( float64, float64 ); +char float64_le_quiet( float64, float64 ); +char float64_lt_quiet( float64, float64 ); +char float64_is_signaling_nan( float64 ); + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int floatx80_to_int32( floatx80 ); +signed int floatx80_to_int32_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern signed char floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +char floatx80_eq( floatx80, floatx80 ); +char floatx80_le( floatx80, floatx80 ); +char floatx80_lt( floatx80, floatx80 ); +char floatx80_eq_signaling( floatx80, floatx80 ); +char floatx80_le_quiet( floatx80, floatx80 ); +char floatx80_lt_quiet( floatx80, floatx80 ); +char floatx80_is_signaling_nan( floatx80 ); + +#endif + +#endif |