From e55c57e0b51c68d78845549505057169c6c3cba6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 24 Jun 2005 20:11:03 -0700
Subject: [SPARC64]: Report any user access faults in termios accessors.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/termios.h | 78 +++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc64/termios.h b/include/asm-sparc64/termios.h
index 8effce0da087..9777a9cca88a 100644
--- a/include/asm-sparc64/termios.h
+++ b/include/asm-sparc64/termios.h
@@ -100,16 +100,17 @@ struct winsize {
 #define user_termio_to_kernel_termios(termios, termio) \
 ({ \
 	unsigned short tmp; \
-	get_user(tmp, &(termio)->c_iflag); \
+	int err; \
+	err = get_user(tmp, &(termio)->c_iflag); \
 	(termios)->c_iflag = (0xffff0000 & ((termios)->c_iflag)) | tmp; \
-	get_user(tmp, &(termio)->c_oflag); \
+	err |= get_user(tmp, &(termio)->c_oflag); \
 	(termios)->c_oflag = (0xffff0000 & ((termios)->c_oflag)) | tmp; \
-	get_user(tmp, &(termio)->c_cflag); \
+	err |= get_user(tmp, &(termio)->c_cflag); \
 	(termios)->c_cflag = (0xffff0000 & ((termios)->c_cflag)) | tmp; \
-	get_user(tmp, &(termio)->c_lflag); \
+	err |= get_user(tmp, &(termio)->c_lflag); \
 	(termios)->c_lflag = (0xffff0000 & ((termios)->c_lflag)) | tmp; \
-	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
-	0; \
+	err |= copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+	err; \
 })
 
 /*
@@ -119,53 +120,56 @@ struct winsize {
  */
 #define kernel_termios_to_user_termio(termio, termios) \
 ({ \
-	put_user((termios)->c_iflag, &(termio)->c_iflag); \
-	put_user((termios)->c_oflag, &(termio)->c_oflag); \
-	put_user((termios)->c_cflag, &(termio)->c_cflag); \
-	put_user((termios)->c_lflag, &(termio)->c_lflag); \
-	put_user((termios)->c_line,  &(termio)->c_line); \
-	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+	int err; \
+	err  = put_user((termios)->c_iflag, &(termio)->c_iflag); \
+	err |= put_user((termios)->c_oflag, &(termio)->c_oflag); \
+	err |= put_user((termios)->c_cflag, &(termio)->c_cflag); \
+	err |= put_user((termios)->c_lflag, &(termio)->c_lflag); \
+	err |= put_user((termios)->c_line,  &(termio)->c_line); \
+	err |= copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
 	if (!((termios)->c_lflag & ICANON)) { \
-		put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \
-		put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \
+		err |= put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \
+		err |= put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \
 	} \
-	0; \
+	err; \
 })
 
 #define user_termios_to_kernel_termios(k, u) \
 ({ \
-	get_user((k)->c_iflag, &(u)->c_iflag); \
-	get_user((k)->c_oflag, &(u)->c_oflag); \
-	get_user((k)->c_cflag, &(u)->c_cflag); \
-	get_user((k)->c_lflag, &(u)->c_lflag); \
-	get_user((k)->c_line,  &(u)->c_line); \
-	copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \
+	int err; \
+	err  = get_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= get_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= get_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= get_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= get_user((k)->c_line,  &(u)->c_line); \
+	err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \
 	if((k)->c_lflag & ICANON) { \
-		get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
-		get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+		err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
 	} else { \
-		get_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
-		get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+		err |= get_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
 	} \
-	0; \
+	err; \
 })
 
 #define kernel_termios_to_user_termios(u, k) \
 ({ \
-	put_user((k)->c_iflag, &(u)->c_iflag); \
-	put_user((k)->c_oflag, &(u)->c_oflag); \
-	put_user((k)->c_cflag, &(u)->c_cflag); \
-	put_user((k)->c_lflag, &(u)->c_lflag); \
-	put_user((k)->c_line, &(u)->c_line); \
-	copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \
+	int err; \
+	err  = put_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= put_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= put_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= put_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= put_user((k)->c_line, &(u)->c_line); \
+	err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \
 	if(!((k)->c_lflag & ICANON)) { \
-		put_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
-		put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+		err |= put_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
 	} else { \
-		put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
-		put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+		err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
 	} \
-	0; \
+	err; \
 })
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From 321ab6a5fab812658626aee6bce2617f8cfb3a55 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sat, 25 Jun 2005 19:30:04 +0100
Subject: [PATCH] ARM: 2752/1: disable ixp2000 PCI I/O software workaround on
 chips that don't need it

Patch from Lennert Buytenhek

The later ixp2000 models don't need the PCI I/O workaround that we
currently perform.  Add a config option to disable the workaround,
and panic on boot if a kernel without the workaround is booted on a
buggy chip.  As only pre-production ixp2000s need the workaround,
the default is for it not to be configured in.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Deepak Saxena
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp2000/io.h           | 20 ++++++++++++++------
 include/asm-arm/arch-ixp2000/ixp2000-regs.h |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp2000/io.h b/include/asm-arm/arch-ixp2000/io.h
index 5e56b47446e0..3241cd6f0778 100644
--- a/include/asm-arm/arch-ixp2000/io.h
+++ b/include/asm-arm/arch-ixp2000/io.h
@@ -17,16 +17,21 @@
 
 #define IO_SPACE_LIMIT		0xffffffff
 #define __mem_pci(a)		(a)
-#define ___io(p)		((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE))
 
 /*
- * The IXP2400 before revision B0 asserts byte lanes for PCI I/O
+ * The A? revisions of the IXP2000s assert byte lanes for PCI I/O
  * transactions the other way round (MEM transactions don't have this
- * issue), so we need to override the standard functions.  B0 and later
- * have a bit that can be set to 1 to get the 'proper' behavior, but
- * since that isn't available on the A? revisions we just keep doing
- * things manually.
+ * issue), so if we want to support those models, we need to override
+ * the standard I/O functions.
+ *
+ * B0 and later have a bit that can be set to 1 to get the proper
+ * behavior for I/O transactions, which then allows us to use the
+ * standard I/O functions.  This is what we do if the user does not
+ * explicitly ask for support for pre-B0.
  */
+#ifdef CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO
+#define ___io(p)		((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE))
+
 #define alignb(addr)		(void __iomem *)((unsigned long)(addr) ^ 3)
 #define alignw(addr)		(void __iomem *)((unsigned long)(addr) ^ 2)
 
@@ -119,6 +124,9 @@
 #define ioport_map(port, nr)	___io(port)
 
 #define ioport_unmap(addr)
+#else
+#define __io(p)			((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE))
+#endif
 
 
 #ifdef CONFIG_ARCH_IXDP2X01
diff --git a/include/asm-arm/arch-ixp2000/ixp2000-regs.h b/include/asm-arm/arch-ixp2000/ixp2000-regs.h
index a1d9e181b10f..5eb47d4bfbf6 100644
--- a/include/asm-arm/arch-ixp2000/ixp2000-regs.h
+++ b/include/asm-arm/arch-ixp2000/ixp2000-regs.h
@@ -241,7 +241,7 @@
 #define PCI_CONTROL_BE_DEI		(1 << 21)	/* Big Endian Data Enable In  */
 #define PCI_CONTROL_BE_BEO		(1 << 20)	/* Big Endian Byte Enable Out */
 #define PCI_CONTROL_BE_BEI		(1 << 19)	/* Big Endian Byte Enable In  */
-#define PCI_CONTROL_PNR			(1 << 17)	/* PCI Not Reset bit */
+#define PCI_CONTROL_IEE			(1 << 17)	/* I/O cycle Endian swap Enable */
 
 #define IXP2000_PCI_RST_REL		(1 << 2)
 #define CFG_RST_DIR			(*IXP2000_PCI_CONTROL & IXP2000_PCICNTL_PCF)
-- 
cgit v1.2.3


From 8749af68216e1ebf6460992fce548f400ecf63a4 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 25 Jun 2005 19:39:45 +0100
Subject: [PATCH] ARM: Generic Dynamic Tick Timer support for ARM, take 4

This patch adds support for Dynamic Tick Timer for ARM. Dynamic Tick is
also known as VST (Variable Scheduling Timeouts).

Dynamic Tick has been in use in the OMAP tree since last October.  The
patch is not intrusive, and does not do anything unless CONFIG_NO_IDLE_HZ
is defined.  This patch has the following fixed based on comments from
RMK:
- Time is updated before calling interrupt handlers.
- Added new interrupt flag SA_TIMER to avoid duplicate timer interrupts
- Moved struct dyn_tick_timer to time.h until we at some point probably
  have an arch independent dyn-tick.h
- Cleaned up testing for DYN_TICK_ENABLED in irq.c

 I've cleaned up this patch to fix some remaining issues:
 - Call the timer tick handler with irqs disabled, as it would be from
   a normal interrupt
 - if we have a dyn_tick, we better implement all methods.
 - generic timer_dyn_reprogram() call, to be called before sleeping
 - added command line option - "dyntick=" to allow boot-time control
   of this feature
    -- rmk

Signed-off-by: Tony Lindgren
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/mach/time.h | 21 +++++++++++++++++++++
 include/asm-arm/signal.h    |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h
index 5cf4fd659fd5..047980ad18d1 100644
--- a/include/asm-arm/mach/time.h
+++ b/include/asm-arm/mach/time.h
@@ -39,8 +39,29 @@ struct sys_timer {
 	void			(*suspend)(void);
 	void			(*resume)(void);
 	unsigned long		(*offset)(void);
+
+#ifdef CONFIG_NO_IDLE_HZ
+	struct dyn_tick_timer	*dyn_tick;
+#endif
+};
+
+#ifdef CONFIG_NO_IDLE_HZ
+
+#define DYN_TICK_SKIPPING	(1 << 2)
+#define DYN_TICK_ENABLED	(1 << 1)
+#define DYN_TICK_SUITABLE	(1 << 0)
+
+struct dyn_tick_timer {
+	unsigned int	state;			/* Current state */
+	int		(*enable)(void);	/* Enables dynamic tick */
+	int		(*disable)(void);	/* Disables dynamic tick */
+	void		(*reprogram)(unsigned long); /* Reprograms the timer */
+	int		(*handler)(int, void *, struct pt_regs *);
 };
 
+void timer_dyn_reprogram(void);
+#endif
+
 extern struct sys_timer *system_timer;
 extern void timer_tick(struct pt_regs *);
 
diff --git a/include/asm-arm/signal.h b/include/asm-arm/signal.h
index 46e69ae395af..760f6e65af05 100644
--- a/include/asm-arm/signal.h
+++ b/include/asm-arm/signal.h
@@ -114,6 +114,7 @@ typedef unsigned long sigset_t;
 #define SIGSTKSZ	8192
 
 #ifdef __KERNEL__
+#define SA_TIMER		0x40000000
 #define SA_IRQNOMASK		0x08000000
 #endif
 
-- 
cgit v1.2.3


From e70c9d5e61c6cb2272c866fc1303e62975006752 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Sat, 25 Jun 2005 14:54:25 -0700
Subject: [PATCH] I8K: use standard DMI interface

I8K: Change to use stock dmi infrastructure instead of homegrown
     parsing code. The driver now requires box's DMI data to match
     list of supported models so driver can be safely compiled-in
     by default without fear of it poking into random SMM BIOS
     code. DMI checks can be ignored with i8k.ignore_dmi option.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/dmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index d2bcf556088b..5e93e6dce9a4 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -9,6 +9,7 @@ enum dmi_field {
 	DMI_SYS_VENDOR,
 	DMI_PRODUCT_NAME,
 	DMI_PRODUCT_VERSION,
+	DMI_PRODUCT_SERIAL,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,
-- 
cgit v1.2.3


From b4819b593740a6d11db07b52e0fe35975b29a185 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
Date: Sat, 25 Jun 2005 14:54:31 -0700
Subject: [PATCH] mips: add MIPS-specific support for flatmem/discontigmem

2.6.12-git6 doesn't boot on some MIPS machines.  They need the support of flat
memory and discontig memory.

Signed-off-by: Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-mips/mmzone.h  | 4 ++++
 include/asm-mips/page.h    | 2 +-
 include/asm-mips/pgtable.h | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/mmzone.h b/include/asm-mips/mmzone.h
index 29ee13be0b2a..d721143dbd47 100644
--- a/include/asm-mips/mmzone.h
+++ b/include/asm-mips/mmzone.h
@@ -8,6 +8,8 @@
 #include <asm/page.h>
 #include <mmzone.h>
 
+#ifdef CONFIG_DISCONTIGMEM
+
 #define kvaddr_to_nid(kvaddr)	pa_to_nid(__pa(kvaddr))
 #define pfn_to_nid(pfn)		pa_to_nid((pfn) << PAGE_SHIFT)
 
@@ -36,4 +38,6 @@
 /* XXX: FIXME -- wli */
 #define kern_addr_valid(addr)	(0)
 
+#endif /* CONFIG_DISCONTIGMEM */
+
 #endif /* _ASM_MMZONE_H_ */
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index d1bf8240e73b..5cae35cd9ba9 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -127,7 +127,7 @@ static __inline__ int get_order(unsigned long size)
 
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
 #define pfn_to_page(pfn)	(mem_map + (pfn))
 #define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index 878843203d67..e76ccd6e3a5d 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -350,7 +350,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	__update_cache(vma, address, pte);
 }
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
 #define kern_addr_valid(addr)	(1)
 #endif
 
-- 
cgit v1.2.3


From 33d9e9b56d5ccd7776fdfe3ecce4a2793dee6fd3 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Sat, 25 Jun 2005 14:54:37 -0700
Subject: [PATCH] ppc32: Add support for Freescale e200 (Book-E) core

The e200 core is a Book-E core (similar to e500) that has a unified L1 cache
and is not cache coherent on the bus.  The e200 core also adds a separate
exception level for debug exceptions.  Part of this patch helps to cleanup a
few cases that are true for all Freescale Book-E parts, not just e500.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/mmu.h         |  2 +-
 include/asm-ppc/mmu_context.h |  2 +-
 include/asm-ppc/ppc_asm.h     |  2 ++
 include/asm-ppc/reg.h         |  1 +
 include/asm-ppc/reg_booke.h   | 18 +++++++++++++++++-
 5 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-ppc/mmu.h b/include/asm-ppc/mmu.h
index d465aee1c82e..9205db404c7a 100644
--- a/include/asm-ppc/mmu.h
+++ b/include/asm-ppc/mmu.h
@@ -405,7 +405,7 @@ typedef struct _P601_BAT {
 
 #define MAS0_TLBSEL(x)	((x << 28) & 0x30000000)
 #define MAS0_ESEL(x)	((x << 16) & 0x0FFF0000)
-#define MAS0_NV		0x00000FFF
+#define MAS0_NV(x)	((x) & 0x00000FFF)
 
 #define MAS1_VALID 	0x80000000
 #define MAS1_IPROT	0x40000000
diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index 9222fa6ca172..ccabbce39d85 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -63,7 +63,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 #define LAST_CONTEXT    	255
 #define FIRST_CONTEXT    	1
 
-#elif defined(CONFIG_E500)
+#elif defined(CONFIG_E200) || defined(CONFIG_E500)
 #define NO_CONTEXT      	256
 #define LAST_CONTEXT    	255
 #define FIRST_CONTEXT    	1
diff --git a/include/asm-ppc/ppc_asm.h b/include/asm-ppc/ppc_asm.h
index 13fa8e7483c1..f76221def484 100644
--- a/include/asm-ppc/ppc_asm.h
+++ b/include/asm-ppc/ppc_asm.h
@@ -174,6 +174,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 #define CLR_TOP32(r)
 #endif /* CONFIG_PPC64BRIDGE */
 
+#define RFCI		.long 0x4c000066	/* rfci instruction */
+#define RFDI		.long 0x4c00004e	/* rfdi instruction */
 #define RFMCI		.long 0x4c00004c	/* rfmci instruction */
 
 #ifdef CONFIG_IBM405_ERR77
diff --git a/include/asm-ppc/reg.h b/include/asm-ppc/reg.h
index c418aab7cd34..88b4222154d4 100644
--- a/include/asm-ppc/reg.h
+++ b/include/asm-ppc/reg.h
@@ -160,6 +160,7 @@
 #define HID0_ICFI	(1<<11)		/* Instr. Cache Flash Invalidate */
 #define HID0_DCI	(1<<10)		/* Data Cache Invalidate */
 #define HID0_SPD	(1<<9)		/* Speculative disable */
+#define HID0_DAPUEN	(1<<8)		/* Debug APU enable */
 #define HID0_SGE	(1<<7)		/* Store Gathering Enable */
 #define HID0_SIED	(1<<7)		/* Serial Instr. Execution [Disable] */
 #define HID0_DFCA	(1<<6)		/* Data Cache Flush Assist */
diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h
index 45c5e6f2b7ab..00ad9c754c78 100644
--- a/include/asm-ppc/reg_booke.h
+++ b/include/asm-ppc/reg_booke.h
@@ -165,6 +165,8 @@ do {						\
 #define SPRN_MCSRR1	0x23B	/* Machine Check Save and Restore Register 1 */
 #define SPRN_MCSR	0x23C	/* Machine Check Status Register */
 #define SPRN_MCAR	0x23D	/* Machine Check Address Register */
+#define SPRN_DSRR0	0x23E	/* Debug Save and Restore Register 0 */
+#define SPRN_DSRR1	0x23F	/* Debug Save and Restore Register 1 */
 #define SPRN_MAS0	0x270	/* MMU Assist Register 0 */
 #define SPRN_MAS1	0x271	/* MMU Assist Register 1 */
 #define SPRN_MAS2	0x272	/* MMU Assist Register 2 */
@@ -264,6 +266,17 @@ do {						\
 #define MCSR_BUS_IPERR 	0x00000002UL /* Instruction parity Error */
 #define MCSR_BUS_RPERR 	0x00000001UL /* Read parity Error */
 #endif
+#ifdef CONFIG_E200
+#define MCSR_MCP 	0x80000000UL /* Machine Check Input Pin */
+#define MCSR_CP_PERR 	0x20000000UL /* Cache Push Parity Error */
+#define MCSR_CPERR 	0x10000000UL /* Cache Parity Error */
+#define MCSR_EXCP_ERR 	0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn
+					fetch for an exception handler */
+#define MCSR_BUS_IRERR 	0x00000010UL /* Read Bus Error on instruction fetch*/
+#define MCSR_BUS_DRERR 	0x00000008UL /* Read Bus Error on data load */
+#define MCSR_BUS_WRERR 	0x00000004UL /* Write Bus Error on buffered
+					store or cache line push */
+#endif
 
 /* Bit definitions for the DBSR. */
 /*
@@ -311,6 +324,7 @@ do {						\
 #define ESR_ST		0x00800000	/* Store Operation */
 #define ESR_DLK		0x00200000	/* Data Cache Locking */
 #define ESR_ILK		0x00100000	/* Instr. Cache Locking */
+#define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
 #define ESR_BO		0x00020000	/* Byte Ordering */
 
 /* Bit definitions related to the DBCR0. */
@@ -387,10 +401,12 @@ do {						\
 #define ICCR_CACHE	1		/* Cacheable */
 
 /* Bit definitions for L1CSR0. */
+#define L1CSR0_CLFC	0x00000100	/* Cache Lock Bits Flash Clear */
 #define L1CSR0_DCFI	0x00000002	/* Data Cache Flash Invalidate */
+#define L1CSR0_CFI	0x00000002	/* Cache Flash Invalidate */
 #define L1CSR0_DCE	0x00000001	/* Data Cache Enable */
 
-/* Bit definitions for L1CSR0. */
+/* Bit definitions for L1CSR1. */
 #define L1CSR1_ICLFR	0x00000100	/* Instr Cache Lock Bits Flash Reset */
 #define L1CSR1_ICFI	0x00000002	/* Instr Cache Flash Invalidate */
 #define L1CSR1_ICE	0x00000001	/* Instr Cache Enable */
-- 
cgit v1.2.3


From 3d9077afea4927e488282da7189de9159db20c17 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Sat, 25 Jun 2005 14:54:39 -0700
Subject: [PATCH] ppc32: Remove FSL OCP support

Support for the OCP device model on Freescale (FSL) PPC's is no longer used.
All FSL PPC's that were using OCP have be converted to using the platform
device model.

Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/fsl_ocp.h | 54 -----------------------------------------------
 include/asm-ppc/ocp.h     |  4 ----
 2 files changed, 58 deletions(-)
 delete mode 100644 include/asm-ppc/fsl_ocp.h

(limited to 'include')

diff --git a/include/asm-ppc/fsl_ocp.h b/include/asm-ppc/fsl_ocp.h
deleted file mode 100644
index 050fbba8d049..000000000000
--- a/include/asm-ppc/fsl_ocp.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * include/asm-ppc/fsl_ocp.h
- *
- * Definitions for the on-chip peripherals on Freescale PPC processors
- *
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
- *
- * Copyright 2004 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifdef __KERNEL__
-#ifndef __ASM_FS_OCP_H__
-#define __ASM_FS_OCP_H__
-
-/* A table of information for supporting the Gianfar Ethernet Controller
- * This helps identify which enet controller we are dealing with,
- * and what type of enet controller it is
- */
-struct ocp_gfar_data {
-	uint interruptTransmit;
-	uint interruptError;
-	uint interruptReceive;
-	uint interruptPHY;
-	uint flags;
-	uint phyid;
-	uint phyregidx;
-	unsigned char mac_addr[6];
-};
-
-/* Flags in the flags field */
-#define GFAR_HAS_COALESCE		0x20
-#define GFAR_HAS_RMON			0x10
-#define GFAR_HAS_MULTI_INTR		0x08
-#define GFAR_FIRM_SET_MACADDR		0x04
-#define GFAR_HAS_PHY_INTR		0x02	/* if not set use a timer */
-#define GFAR_HAS_GIGABIT		0x01
-
-/* Data structure for I2C support.  Just contains a couple flags
- * to distinguish various I2C implementations*/
-struct ocp_fs_i2c_data {
-	uint flags;
-};
-
-/* Flags for I2C */
-#define FS_I2C_SEPARATE_DFSRR	0x02
-#define FS_I2C_CLOCK_5200	0x01
-
-#endif	/* __ASM_FS_OCP_H__ */
-#endif	/* __KERNEL__ */
diff --git a/include/asm-ppc/ocp.h b/include/asm-ppc/ocp.h
index c726f1845190..983116f59d90 100644
--- a/include/asm-ppc/ocp.h
+++ b/include/asm-ppc/ocp.h
@@ -202,10 +202,6 @@ static DEVICE_ATTR(name##_##field, S_IRUGO, show_##name##_##field, NULL);
 #include <asm/ibm_ocp.h>
 #endif
 
-#ifdef CONFIG_FSL_OCP
-#include <asm/fsl_ocp.h>
-#endif
-
 #endif				/* CONFIG_PPC_OCP */
 #endif				/* __OCP_H__ */
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From f370513640492641b4046bfd9a6e4714f6ae530d Mon Sep 17 00:00:00 2001
From: Zwane Mwaikambo <zwane@linuxpower.ca>
Date: Sat, 25 Jun 2005 14:54:50 -0700
Subject: [PATCH] i386 CPU hotplug

(The i386 CPU hotplug patch provides infrastructure for some work which Pavel
is doing as well as for ACPI S3 (suspend-to-RAM) work which Li Shaohua
<shaohua.li@intel.com> is doing)

The following provides i386 architecture support for safely unregistering and
registering processors during runtime, updated for the current -mm tree.  In
order to avoid dumping cpu hotplug code into kernel/irq/* i dropped the
cpu_online check in do_IRQ() by modifying fixup_irqs().  The difference being
that on cpu offline, fixup_irqs() is called before we clear the cpu from
cpu_online_map and a long delay in order to ensure that we never have any
queued external interrupts on the APICs.  There are additional changes to s390
and ppc64 to account for this change.

1) Add CONFIG_HOTPLUG_CPU
2) disable local APIC timer on dead cpus.
3) Disable preempt around irq balancing to prevent CPUs going down.
4) Print irq stats for all possible cpus.
5) Debugging check for interrupts on offline cpus.
6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
7) play_dead() for offline cpus to spin inside.
8) Handle offline cpus set in flush_tlb_others().
9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
10) Implement __cpu_disable() and __cpu_die().
11) Enable local interrupts in cpu_enable() after fixup_irqs()
12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.

Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/cpu.h | 2 ++
 include/asm-i386/irq.h | 4 ++++
 include/asm-i386/smp.h | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h
index 002740b21951..e7252c216ca8 100644
--- a/include/asm-i386/cpu.h
+++ b/include/asm-i386/cpu.h
@@ -5,6 +5,7 @@
 #include <linux/cpu.h>
 #include <linux/topology.h>
 #include <linux/nodemask.h>
+#include <linux/percpu.h>
 
 #include <asm/node.h>
 
@@ -16,4 +17,5 @@ extern int arch_register_cpu(int num);
 extern void arch_unregister_cpu(int);
 #endif
 
+DECLARE_PER_CPU(int, cpu_state);
 #endif /* _ASM_I386_CPU_H_ */
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 05b9e61b0a72..e2d8bf23ad70 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -38,4 +38,8 @@ extern void release_vm86_irqs(struct task_struct *);
 extern int irqbalance_disable(char *str);
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern void fixup_irqs(cpumask_t map);
+#endif
+
 #endif /* _ASM_IRQ_H */
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 55ef31f66bbe..507f2fd39a6a 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -83,6 +83,9 @@ static __inline int logical_smp_processor_id(void)
 }
 
 #endif
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
 #endif /* !__ASSEMBLY__ */
 
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
-- 
cgit v1.2.3


From 67664c8f7e74def5adf66298a1245d82af72db2c Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:54:52 -0700
Subject: [PATCH] i386: Dont use IPI broadcast when using cpu hotplug.

This patch introduces a startup parameter no_broadcast.  When we enable
CONFIG_HOTPLUG_CPU, we dont want to use broadcast shortcut as it has ill
effects on a offline cpu.  If we issue broadcast, the IPI is also delivered
to offline cpus, or partially up cpu causing stale IPI's to be handled,
which is a problem and can cause undesirable effects.

Introduces a new startup cmdline option no_ipi_broadcast, that can be
switched at cmdline if necessary.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/mach-default/mach_ipi.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/mach-default/mach_ipi.h b/include/asm-i386/mach-default/mach_ipi.h
index 6f2b17a20089..cc756a67cd63 100644
--- a/include/asm-i386/mach-default/mach_ipi.h
+++ b/include/asm-i386/mach-default/mach_ipi.h
@@ -4,11 +4,34 @@
 void send_IPI_mask_bitmask(cpumask_t mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
+extern int no_broadcast;
+
 static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
 
+static inline void __local_send_IPI_allbutself(int vector)
+{
+	if (no_broadcast) {
+		cpumask_t mask = cpu_online_map;
+		int this_cpu = get_cpu();
+
+		cpu_clear(this_cpu, mask);
+		send_IPI_mask(mask, vector);
+		put_cpu();
+	} else
+		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void __local_send_IPI_all(int vector)
+{
+	if (no_broadcast)
+		send_IPI_mask(cpu_online_map, vector);
+	else
+		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
 static inline void send_IPI_allbutself(int vector)
 {
 	/*
@@ -18,13 +41,13 @@ static inline void send_IPI_allbutself(int vector)
 	if (!(num_online_cpus() > 1))
 		return;
 
-	__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+	__local_send_IPI_allbutself(vector);
 	return;
 }
 
 static inline void send_IPI_all(int vector)
 {
-	__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+	__local_send_IPI_all(vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
-- 
cgit v1.2.3


From 6fe940d6c300886de4ff1454d8ffd363172af433 Mon Sep 17 00:00:00 2001
From: Li Shaohua <shaohua.li@intel.com>
Date: Sat, 25 Jun 2005 14:54:53 -0700
Subject: [PATCH] sep initializing rework

Make SEP init per-cpu, so it is hotplug safe.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/processor.h | 2 ++
 include/asm-i386/smp.h       | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index c76c50e96225..6f0f93d0d417 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -691,5 +691,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
 #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
 
 extern unsigned long boot_option_idle_override;
+extern void enable_sep_cpu(void);
+extern int sysenter_setup(void);
 
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 507f2fd39a6a..2451ead0ca5c 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -42,6 +42,8 @@ extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
 extern void smp_invalidate_rcv(void);		/* Process an NMI */
 extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
+extern void lock_ipi_call_lock(void);
+extern void unlock_ipi_call_lock(void);
 
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
-- 
cgit v1.2.3


From e1367daf3eed5cd619ee88c9907e1e6ddaa58406 Mon Sep 17 00:00:00 2001
From: Li Shaohua <shaohua.li@intel.com>
Date: Sat, 25 Jun 2005 14:54:56 -0700
Subject: [PATCH] cpu state clean after hot remove

Clean CPU states in order to reuse smp boot code for CPU hotplug.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/irq.h | 2 ++
 include/asm-i386/smp.h | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index e2d8bf23ad70..270f1986b19f 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -29,9 +29,11 @@ extern void release_vm86_irqs(struct task_struct *);
 
 #ifdef CONFIG_4KSTACKS
   extern void irq_ctx_init(int cpu);
+  extern void irq_ctx_exit(int cpu);
 # define __ARCH_HAS_DO_SOFTIRQ
 #else
 # define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
 #endif
 
 #ifdef CONFIG_IRQBALANCE
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 2451ead0ca5c..c9996eda5408 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -48,6 +48,14 @@ extern void unlock_ipi_call_lock(void);
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern void cpu_exit_clear(void);
+extern void cpu_uninit(void);
+
+#define __HAVE_ARCH_SMP_PREPARE_CPU
+extern int smp_prepare_cpu(int cpu);
+#endif
+
 /*
  * This function is needed by all SMP systems. It must _always_ be valid
  * from the initial startup. We map APIC_BASE very early in page_setup(),
-- 
cgit v1.2.3


From 52a119feaad92d44a0e97d01b22afbcbaf3fc079 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:54:57 -0700
Subject: [PATCH] make smp_prepare_cpu to a weak function

I really wish smp_prepare_cpu() would disappear eventually.  In the interim
this is ideally a weak function, so we dont end up changing several places
to define this dummy in headers.

Today since the dummy declaration is done only in drivers/base/cpu.c but
the function is called in kernel/power/smp.c i get undefined reference in
my cpu hotplug code for x86_64 under development.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/smp.h | 3 ---
 include/linux/cpu.h    | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index c9996eda5408..edad9b4712fa 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -51,9 +51,6 @@ extern u8 x86_cpu_to_apicid[];
 #ifdef CONFIG_HOTPLUG_CPU
 extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
-
-#define __HAVE_ARCH_SMP_PREPARE_CPU
-extern int smp_prepare_cpu(int cpu);
 #endif
 
 /*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index fe0298e5dae1..e8904c0da686 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,6 +69,7 @@ extern struct semaphore cpucontrol;
 	register_cpu_notifier(&fn##_nb);			\
 }
 int cpu_down(unsigned int cpu);
+extern int __attribute__((weak)) smp_prepare_cpu(int cpu);
 #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 #else
 #define lock_cpu_hotplug()	do { } while (0)
-- 
cgit v1.2.3


From e6982c671c560da4a0bc5c908cbcbec12bd5991d Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:54:58 -0700
Subject: [PATCH] x86_64: Change init sections for CPU hotplug support

This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.

Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen.  Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.

If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index 05c83e0521ca..59008c3826cf 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -229,6 +229,18 @@ void __init parse_early_param(void);
 #define __devexitdata __exitdata
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+#define __cpuinit
+#define __cpuinitdata
+#define __cpuexit
+#define __cpuexitdata
+#else
+#define __cpuinit	__init
+#define __cpuinitdata __initdata
+#define __cpuexit __exit
+#define __cpuexitdata	__exitdata
+#endif
+
 /* Functions marked as __devexit may be discarded at kernel link time, depending
    on config options.  Newer versions of binutils detect references from
    retained sections to discarded sections and flag an error.  Pointers to
-- 
cgit v1.2.3


From 76e4f660d9f4c6d1bb473f72be2988c35eaca948 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:55:00 -0700
Subject: [PATCH] x86_64: CPU hotplug support

  Experimental CPU hotplug patch for x86_64
  -----------------------------------------
This supports logical CPU online and offline.
- Test with maxcpus=1, and then kick other cpu's off to test if init code
  is all cleaned up. CONFIG_SCHED_SMT works as well.
- idle threads are forked on demand from keventd threads for clean startup

TBD:
1. Not tested on a real NUMA machine (tested with numa=fake=2)
2. Handle ACPI pieces for physical hotplug support.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Shaohua.li<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/irq.h | 5 +++++
 include/asm-x86_64/smp.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h
index 3af50b3c3b05..eb3b7aa9eb9f 100644
--- a/include/asm-x86_64/irq.h
+++ b/include/asm-x86_64/irq.h
@@ -52,4 +52,9 @@ struct irqaction;
 struct pt_regs;
 int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 
+#ifdef CONFIG_HOTPLUG_CPU
+#include <linux/cpumask.h>
+extern void fixup_irqs(cpumask_t map);
+#endif
+
 #endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index a7425aa5a3b7..9c6242fb99db 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -77,6 +77,8 @@ extern __inline int hard_smp_processor_id(void)
 }
 
 extern int safe_smp_processor_id(void);
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
 
 #endif /* !ASSEMBLY */
 
-- 
cgit v1.2.3


From 884d9e40b4089014f40c49e86ac6505842db2b53 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:55:02 -0700
Subject: [PATCH] x86_64: Dont use broadcast shortcut to make it cpu hotplug
 safe.

Broadcast IPI's provide un-expected behaviour for cpu hotplug.  CPU's in
offline state also end up receiving the IPI.  Once the cpus become online they
receive these stale IPI's which are bad and introduce unexpected behaviour.

This is easily avoided by not sending a broadcast and addressing just the
CPU's in online map.  Doing prelim cycle counts it appears there is no big
overhead and numbers seem around 0x3000-0x3900 on an average on x86 and x86_64
systems with CPUS running 3G, both for broadcast and mask version of the
API's.

The shortcuts are useful only for flat mode (where the perf shows no
degradation), and in cluster mode, its unicast anyway.  Its simpler to just
not use broadcast anymore.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/smp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 9c6242fb99db..aeb1b73e21e1 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -43,6 +43,8 @@ extern cpumask_t cpu_callout_map;
 extern void smp_alloc_memory(void);
 extern volatile unsigned long smp_invalidate_needed;
 extern int pic_mode;
+extern void lock_ipi_call_lock(void);
+extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_flush_tlb(void);
 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
-- 
cgit v1.2.3


From 5a72e04df5470df0ec646029d31e5528167ab1a7 Mon Sep 17 00:00:00 2001
From: Li Shaohua <shaohua.li@intel.com>
Date: Sat, 25 Jun 2005 14:55:06 -0700
Subject: [PATCH] suspend/resume SMP support

Using CPU hotplug to support suspend/resume SMP.  Both S3 and S4 use
disable/enable_nonboot_cpus API.  The S4 part is based on Pavel's original S4
SMP patch.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2bf0d5fabcdb..f2e96fdfaae0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -58,7 +58,7 @@ static inline int software_suspend(void)
 }
 #endif
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SUSPEND_SMP
 extern void disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
-- 
cgit v1.2.3


From 620b03276488c3cf103caf1e326bd21f00d3df84 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sat, 25 Jun 2005 14:55:11 -0700
Subject: [PATCH] properly stop devices before poweroff

Without this patch, Linux provokes emergency disk shutdowns and
similar nastiness. It was in SuSE kernels for some time, IIRC.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pm.h | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index ed2b76e75199..14479325e3f3 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -103,7 +103,8 @@ extern int pm_active;
 /*
  * Register a device with power management
  */
-struct pm_dev __deprecated *pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
+struct pm_dev __deprecated *
+pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
 
 /*
  * Unregister a device with power management
@@ -190,17 +191,18 @@ typedef u32 __bitwise pm_message_t;
 /*
  * There are 4 important states driver can be in:
  * ON     -- driver is working
- * FREEZE -- stop operations and apply whatever policy is applicable to a suspended driver
- *           of that class, freeze queues for block like IDE does, drop packets for
- *           ethernet, etc... stop DMA engine too etc... so a consistent image can be
- *           saved; but do not power any hardware down.
- * SUSPEND - like FREEZE, but hardware is doing as much powersaving as possible. Roughly
- *           pci D3.
+ * FREEZE -- stop operations and apply whatever policy is applicable to a
+ *           suspended driver of that class, freeze queues for block like IDE
+ *           does, drop packets for ethernet, etc... stop DMA engine too etc...
+ *           so a consistent image can be saved; but do not power any hardware
+ *           down.
+ * SUSPEND - like FREEZE, but hardware is doing as much powersaving as
+ *           possible. Roughly pci D3.
  *
- * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 (SUSPEND).
- * We'll need to fix the drivers. So yes, putting 3 to all diferent defines is intentional,
- * and will go away as soon as drivers are fixed. Also note that typedef is neccessary,
- * we'll probably want to switch to
+ * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3
+ * (SUSPEND).  We'll need to fix the drivers. So yes, putting 3 to all different
+ * defines is intentional, and will go away as soon as drivers are fixed.  Also
+ * note that typedef is neccessary, we'll probably want to switch to
  *   typedef struct pm_message_t { int event; int flags; } pm_message_t
  * or something similar soon.
  */
@@ -222,11 +224,18 @@ struct dev_pm_info {
 
 extern void device_pm_set_parent(struct device * dev, struct device * parent);
 
-extern int device_suspend(pm_message_t state);
 extern int device_power_down(pm_message_t state);
 extern void device_power_up(void);
 extern void device_resume(void);
 
+#ifdef CONFIG_PM
+extern int device_suspend(pm_message_t state);
+#else
+static inline int device_suspend(pm_message_t state)
+{
+	return 0;
+}
+#endif
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 8d783b3e02002bce8cf9d4e4a82922ee7e59b1e5 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sat, 25 Jun 2005 14:55:14 -0700
Subject: [PATCH] swsusp: clean assembly parts

This patch fixes register saving so that each register is only saved once,
and adds missing saving of %cr8 on x86-64.  Some reordering so that
save/restore is more logical/safer (segment registers should be restored
after gdt).

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/suspend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
index ec745807feae..bb9f40597d09 100644
--- a/include/asm-x86_64/suspend.h
+++ b/include/asm-x86_64/suspend.h
@@ -16,7 +16,7 @@ arch_prepare_suspend(void)
 struct saved_context {
   	u16 ds, es, fs, gs, ss;
 	unsigned long gs_base, gs_kernel_base, fs_base;
-	unsigned long cr0, cr2, cr3, cr4;
+	unsigned long cr0, cr2, cr3, cr4, cr8;
 	u16 gdt_pad;
 	u16 gdt_limit;
 	unsigned long gdt_base;
-- 
cgit v1.2.3


From 84dd8d7e9c080b4db66b00b8bc36ccf09a90f824 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sat, 25 Jun 2005 14:55:26 -0700
Subject: [PATCH] uml: add profile_pc for i386

Cope with a conditional i386 definition, which is wrong for UML.  Before we
just used that one, but it wasn't defined for CONFIG_SMP, so in that case
we got link errors.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-um/ptrace-i386.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-um/ptrace-i386.h b/include/asm-um/ptrace-i386.h
index 04222f35c43e..fe882b9d917e 100644
--- a/include/asm-um/ptrace-i386.h
+++ b/include/asm-um/ptrace-i386.h
@@ -32,6 +32,10 @@
 #define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
 #define PT_FIX_EXEC_STACK(sp) do ; while(0)
 
+/* Cope with a conditional i386 definition. */
+#undef profile_pc
+#define profile_pc(regs) PT_REGS_IP(regs)
+
 #define user_mode(r) UPT_IS_USER(&(r)->regs)
 
 #endif
-- 
cgit v1.2.3


From 77fa22450de00d535de2cc8be653983560828000 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 25 Jun 2005 14:55:30 -0700
Subject: [PATCH] s390: improved machine check handling

Improved machine check handling.  Kernel is now able to receive machine checks
while in kernel mode (system call, interrupt and program check handling).
Also register validation is now performed.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/lowcore.h     |  7 +++++-
 include/asm-s390/processor.h   | 52 ++++++++++++++++--------------------------
 include/asm-s390/ptrace.h      |  2 +-
 include/asm-s390/system.h      | 21 ++++++++++++-----
 include/asm-s390/thread_info.h |  2 ++
 5 files changed, 44 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index df5172fc589d..76b5b19c0ae2 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -109,10 +109,14 @@
 
 #ifndef __s390x__
 #define __LC_PFAULT_INTPARM             0x080
+#define __LC_CPU_TIMER_SAVE_AREA        0x0D8
 #define __LC_AREGS_SAVE_AREA            0x120
+#define __LC_GPREGS_SAVE_AREA           0x180
 #define __LC_CREGS_SAVE_AREA            0x1C0
 #else /* __s390x__ */
 #define __LC_PFAULT_INTPARM             0x11B8
+#define __LC_GPREGS_SAVE_AREA           0x1280
+#define __LC_CPU_TIMER_SAVE_AREA        0x1328
 #define __LC_AREGS_SAVE_AREA            0x1340
 #define __LC_CREGS_SAVE_AREA            0x1380
 #endif /* __s390x__ */
@@ -167,7 +171,8 @@ struct _lowcore
 	__u16        subchannel_nr;            /* 0x0ba */
 	__u32        io_int_parm;              /* 0x0bc */
 	__u32        io_int_word;              /* 0x0c0 */
-        __u8         pad3[0xD8-0xC4];          /* 0x0c4 */
+        __u8         pad3[0xD4-0xC4];          /* 0x0c4 */
+	__u32        extended_save_area_addr;  /* 0x0d4 */
 	__u32        cpu_timer_save_area[2];   /* 0x0d8 */
 	__u32        clock_comp_save_area[2];  /* 0x0e0 */
 	__u32        mcck_interruption_code[2]; /* 0x0e8 */
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index fb46e9090b50..8bd14de69e35 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -206,6 +206,18 @@ unsigned long get_wchan(struct task_struct *p);
 	asm volatile ("ex 0,%0" : : "i" (__LC_DIAG44_OPCODE) : "memory")
 #endif /* __s390x__ */
 
+/*
+ * Set PSW to specified value.
+ */
+static inline void __load_psw(psw_t psw)
+{
+#ifndef __s390x__
+	asm volatile ("lpsw  0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+#else
+	asm volatile ("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+#endif
+}
+
 /*
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
@@ -214,8 +226,8 @@ unsigned long get_wchan(struct task_struct *p);
 static inline void __load_psw_mask (unsigned long mask)
 {
 	unsigned long addr;
-
 	psw_t psw;
+
 	psw.mask = mask;
 
 #ifndef __s390x__
@@ -241,30 +253,8 @@ static inline void __load_psw_mask (unsigned long mask)
  */
 static inline void enabled_wait(void)
 {
-	unsigned long reg;
-	psw_t wait_psw;
-
-	wait_psw.mask = PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT |
-		PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY;
-#ifndef __s390x__
-	asm volatile (
-		"    basr %0,0\n"
-		"0:  la   %0,1f-0b(%0)\n"
-		"    st   %0,4(%1)\n"
-		"    oi   4(%1),0x80\n"
-		"    lpsw 0(%1)\n"
-		"1:"
-		: "=&a" (reg) : "a" (&wait_psw), "m" (wait_psw)
-		: "memory", "cc" );
-#else /* __s390x__ */
-	asm volatile (
-		"    larl  %0,0f\n"
-		"    stg   %0,8(%1)\n"
-		"    lpswe 0(%1)\n"
-		"0:"
-		: "=&a" (reg) : "a" (&wait_psw), "m" (wait_psw)
-		: "memory", "cc" );
-#endif /* __s390x__ */
+	__load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT |
+			PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY);
 }
 
 /*
@@ -273,13 +263,11 @@ static inline void enabled_wait(void)
 
 static inline void disabled_wait(unsigned long code)
 {
-        char psw_buffer[2*sizeof(psw_t)];
         unsigned long ctl_buf;
-        psw_t *dw_psw = (psw_t *)(((unsigned long) &psw_buffer+sizeof(psw_t)-1)
-                                  & -sizeof(psw_t));
+        psw_t dw_psw;
 
-        dw_psw->mask = PSW_BASE_BITS | PSW_MASK_WAIT;
-        dw_psw->addr = code;
+        dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT;
+        dw_psw.addr = code;
         /* 
          * Store status and then load disabled wait psw,
          * the processor is dead afterwards
@@ -301,7 +289,7 @@ static inline void disabled_wait(unsigned long code)
                       "    oi    0x1c0,0x10\n" /* fake protection bit */
                       "    lpsw 0(%1)"
                       : "=m" (ctl_buf)
-		      : "a" (dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" );
+		      : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" );
 #else /* __s390x__ */
         asm volatile ("    stctg 0,0,0(%2)\n"
                       "    ni    4(%2),0xef\n" /* switch off protection */
@@ -333,7 +321,7 @@ static inline void disabled_wait(unsigned long code)
                       "    oi    0x384(1),0x10\n" /* fake protection bit */
                       "    lpswe 0(%1)"
                       : "=m" (ctl_buf)
-		      : "a" (dw_psw), "a" (&ctl_buf),
+		      : "a" (&dw_psw), "a" (&ctl_buf),
 		        "m" (dw_psw) : "cc", "0", "1");
 #endif /* __s390x__ */
 }
diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
index 4eff8f2e3bf1..fc7c96edc697 100644
--- a/include/asm-s390/ptrace.h
+++ b/include/asm-s390/ptrace.h
@@ -276,7 +276,7 @@ typedef struct
 #endif /* __s390x__ */
 
 #define PSW_KERNEL_BITS	(PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | \
-			 PSW_DEFAULT_KEY)
+			 PSW_MASK_MCHECK | PSW_DEFAULT_KEY)
 #define PSW_USER_BITS	(PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | \
 			 PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | \
 			 PSW_MASK_PSTATE | PSW_DEFAULT_KEY)
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 81514d76edcf..e3cb3ce1d24a 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -16,6 +16,7 @@
 #include <asm/types.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
+#include <asm/processor.h>
 
 #ifdef __KERNEL__
 
@@ -331,9 +332,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 
 #ifdef __s390x__
 
-#define __load_psw(psw) \
-        __asm__ __volatile__("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
-
 #define __ctl_load(array, low, high) ({ \
 	typedef struct { char _[sizeof(array)]; } addrtype; \
 	__asm__ __volatile__ ( \
@@ -390,9 +388,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 
 #else /* __s390x__ */
 
-#define __load_psw(psw) \
-	__asm__ __volatile__("lpsw 0(%0)" : : "a" (&psw) : "cc" );
-
 #define __ctl_load(array, low, high) ({ \
 	typedef struct { char _[sizeof(array)]; } addrtype; \
 	__asm__ __volatile__ ( \
@@ -451,6 +446,20 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 /* For spinlocks etc */
 #define local_irq_save(x)	((x) = local_irq_disable())
 
+/*
+ * Use to set psw mask except for the first byte which
+ * won't be changed by this function.
+ */
+static inline void
+__set_psw_mask(unsigned long mask)
+{
+	local_save_flags(mask);
+	__load_psw_mask(mask);
+}
+
+#define local_mcck_enable()  __set_psw_mask(PSW_KERNEL_BITS)
+#define local_mcck_disable() __set_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK)
+
 #ifdef CONFIG_SMP
 
 extern void smp_ctl_set_bit(int cr, int bit);
diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h
index fe101d41e849..6c18a3f24316 100644
--- a/include/asm-s390/thread_info.h
+++ b/include/asm-s390/thread_info.h
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTART_SVC		4	/* restart svc with new svc number */
 #define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
 #define TIF_SINGLE_STEP		6	/* deliver sigtrap on return to user */
+#define TIF_MCCK_PENDING	7	/* machine check handling is pending */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling 
 					   TIF_NEED_RESCHED */
@@ -109,6 +110,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTART_SVC	(1<<TIF_RESTART_SVC)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
+#define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
-- 
cgit v1.2.3


From 6b979de395c7e1b7e59f74a870e1d1911853eccb Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <cborntra@de.ibm.com>
Date: Sat, 25 Jun 2005 14:55:32 -0700
Subject: [PATCH] s390: add vmcp interface

Add interface to issue VM control program commands.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/cpcmd.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-s390/cpcmd.h b/include/asm-s390/cpcmd.h
index 1d33c5da083e..1fcf65be7a23 100644
--- a/include/asm-s390/cpcmd.h
+++ b/include/asm-s390/cpcmd.h
@@ -11,14 +11,28 @@
 #define __CPCMD__
 
 /*
+ * the lowlevel function for cpcmd
  * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
  */
-extern void __cpcmd(char *cmd, char *response, int rlen);
+extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
 
 #ifndef __s390x__
 #define cpcmd __cpcmd
 #else
-extern void cpcmd(char *cmd, char *response, int rlen);
+/*
+ * cpcmd is the in-kernel interface for issuing CP commands
+ *
+ * cmd:		null-terminated command string, max 240 characters
+ * response:	response buffer for VM's textual response
+ * rlen:	size of the response buffer, cpcmd will not exceed this size
+ *		but will cap the output, if its too large. Everything that
+ *		did not fit into the buffer will be silently dropped
+ * response_code: return pointer for VM's error code
+ * return value: the size of the response. The caller can check if the buffer
+ *		was large enough by comparing the return value and rlen
+ * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ */
+extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
 #endif /*__s390x__*/
 
 #endif
-- 
cgit v1.2.3


From 66a464dbc8e0345b6f972b92bf1118e043d7c987 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Sat, 25 Jun 2005 14:55:33 -0700
Subject: [PATCH] s390: debug feature changes

This patch changes the memory allocation method for the s390 debug feature.
Trace buffers had been allocated using the get_free_pages() function before.
Therefore it was not possible to get big memory areas in a running system due
to memory fragmentation.  Now the trace buffers are subdivided into several
subbuffers with pagesize.  Therefore it is now possible to allocate more
memory for the trace buffers and more trace records can be written.

In addition to that, dynamic specification of the size of the trace buffers is
implemented.  It is now possible to change the size of a trace buffer using a
new debugfs file instance.  When writing a number into this file, the trace
buffer size is changed to 'number * pagesize'.

In the past all the traces could be obtained from userspace by accessing files
in the "proc" filesystem.  Now with debugfs we have a new filesystem which
should be used for debugging purposes.  This patch moves the debug feature
from procfs to debugfs.

Since the interface of debug_register() changed, all device drivers, which use
the debug feature had to be adjusted.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/debug.h | 48 +++++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-s390/debug.h b/include/asm-s390/debug.h
index 6bbcdea42a86..92360d90144b 100644
--- a/include/asm-s390/debug.h
+++ b/include/asm-s390/debug.h
@@ -9,6 +9,8 @@
 #ifndef DEBUG_H
 #define DEBUG_H
 
+#include <linux/config.h>
+#include <linux/fs.h>
 #include <linux/string.h>
 
 /* Note:
@@ -31,19 +33,18 @@ struct __debug_entry{
 } __attribute__((packed));
 
 
-#define __DEBUG_FEATURE_VERSION      1  /* version of debug feature */
+#define __DEBUG_FEATURE_VERSION      2  /* version of debug feature */
 
 #ifdef __KERNEL__
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/time.h>
-#include <linux/proc_fs.h>
 
 #define DEBUG_MAX_LEVEL            6  /* debug levels range from 0 to 6 */
 #define DEBUG_OFF_LEVEL            -1 /* level where debug is switched off */
 #define DEBUG_FLUSH_ALL            -1 /* parameter to flush all areas */
 #define DEBUG_MAX_VIEWS            10 /* max number of views in proc fs */
-#define DEBUG_MAX_PROCF_LEN        64 /* max length for a proc file name */
+#define DEBUG_MAX_NAME_LEN         64 /* max length for a debugfs file name */
 #define DEBUG_DEFAULT_LEVEL        3  /* initial debug level */
 
 #define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */
@@ -64,16 +65,17 @@ typedef struct debug_info {
 	spinlock_t lock;			
 	int level;
 	int nr_areas;
-	int page_order;
+	int pages_per_area;
 	int buf_size;
 	int entry_size;	
-	debug_entry_t** areas;
+	debug_entry_t*** areas;
 	int active_area;
-	int *active_entry;
-	struct proc_dir_entry* proc_root_entry;
-	struct proc_dir_entry* proc_entries[DEBUG_MAX_VIEWS];
+	int *active_pages;
+	int *active_entries;
+	struct dentry* debugfs_root_entry;
+	struct dentry* debugfs_entries[DEBUG_MAX_VIEWS];
 	struct debug_view* views[DEBUG_MAX_VIEWS];	
-	char name[DEBUG_MAX_PROCF_LEN];
+	char name[DEBUG_MAX_NAME_LEN];
 } debug_info_t;
 
 typedef int (debug_header_proc_t) (debug_info_t* id,
@@ -98,7 +100,7 @@ int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view,
 		         int area, debug_entry_t* entry, char* out_buf);						
 				
 struct debug_view {
-	char name[DEBUG_MAX_PROCF_LEN];
+	char name[DEBUG_MAX_NAME_LEN];
 	debug_prolog_proc_t* prolog_proc;
 	debug_header_proc_t* header_proc;
 	debug_format_proc_t* format_proc;
@@ -120,7 +122,7 @@ debug_entry_t* debug_exception_common(debug_info_t* id, int level,
 
 /* Debug Feature API: */
 
-debug_info_t* debug_register(char* name, int pages_index, int nr_areas,
+debug_info_t* debug_register(char* name, int pages, int nr_areas,
                              int buf_size);
 
 void debug_unregister(debug_info_t* id);
@@ -132,7 +134,8 @@ void debug_stop_all(void);
 extern inline debug_entry_t* 
 debug_event(debug_info_t* id, int level, void* data, int length)
 {
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_event_common(id,level,data,length);
 }
 
@@ -140,7 +143,8 @@ extern inline debug_entry_t*
 debug_int_event(debug_info_t* id, int level, unsigned int tag)
 {
         unsigned int t=tag;
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_event_common(id,level,&t,sizeof(unsigned int));
 }
 
@@ -148,14 +152,16 @@ extern inline debug_entry_t *
 debug_long_event (debug_info_t* id, int level, unsigned long tag)
 {
         unsigned long t=tag;
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_event_common(id,level,&t,sizeof(unsigned long));
 }
 
 extern inline debug_entry_t* 
 debug_text_event(debug_info_t* id, int level, const char* txt)
 {
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_event_common(id,level,txt,strlen(txt));
 }
 
@@ -167,7 +173,8 @@ debug_sprintf_event(debug_info_t* id,int level,char *string,...)
 extern inline debug_entry_t* 
 debug_exception(debug_info_t* id, int level, void* data, int length)
 {
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_exception_common(id,level,data,length);
 }
 
@@ -175,7 +182,8 @@ extern inline debug_entry_t*
 debug_int_exception(debug_info_t* id, int level, unsigned int tag)
 {
         unsigned int t=tag;
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_exception_common(id,level,&t,sizeof(unsigned int));
 }
 
@@ -183,14 +191,16 @@ extern inline debug_entry_t *
 debug_long_exception (debug_info_t* id, int level, unsigned long tag)
 {
         unsigned long t=tag;
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_exception_common(id,level,&t,sizeof(unsigned long));
 }
 
 extern inline debug_entry_t* 
 debug_text_exception(debug_info_t* id, int level, const char* txt)
 {
-	if ((!id) || (level > id->level)) return NULL;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
         return debug_exception_common(id,level,txt,strlen(txt));
 }
 
-- 
cgit v1.2.3


From b2b18660066997420b716c1881a6be8b82700d97 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Sat, 25 Jun 2005 14:55:38 -0700
Subject: [PATCH] RCU: clean up a few remaining synchronize_kernel() calls

2.6.12-rc6-mm1 has a few remaining synchronize_kernel()s, some (but not
all) in comments.  This patch changes these synchronize_kernel() calls (and
comments) to synchronize_rcu() or synchronize_sched() as follows:

- arch/x86_64/kernel/mce.c mce_read(): change to synchronize_sched() to
  handle races with machine-check exceptions (synchronize_rcu() would not cut
  it given RCU implementations intended for hardcore realtime use.

- drivers/input/serio/i8042.c i8042_stop(): change to synchronize_sched() to
  handle races with i8042_interrupt() interrupt handler.  Again,
  synchronize_rcu() would not cut it given RCU implementations intended for
  hardcore realtime use.

- include/*/kdebug.h comments: change to synchronize_sched() to handle races
  with NMIs.  As before, synchronize_rcu() would not cut it...

- include/linux/list.h comment: change to synchronize_rcu(), since this
  comment is for list_del_rcu().

- security/keys/key.c unregister_key_type(): change to synchronize_rcu(),
  since this is interacting with RCU read side.

- security/keys/process_keys.c install_session_keyring(): change to
  synchronize_rcu(), since this is interacting with RCU read side.

Signed-off-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/kdebug.h    | 2 +-
 include/asm-ppc64/kdebug.h   | 2 +-
 include/asm-sparc64/kdebug.h | 2 +-
 include/asm-x86_64/kdebug.h  | 2 +-
 include/linux/list.h         | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/kdebug.h b/include/asm-i386/kdebug.h
index de6498b0d493..b3f8d5f59d5d 100644
--- a/include/asm-i386/kdebug.h
+++ b/include/asm-i386/kdebug.h
@@ -18,7 +18,7 @@ struct die_args {
 };
 
 /* Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_kernel - then free.
+   If you really want to do it first unregister - then synchronize_sched - then free.
   */
 int register_die_notifier(struct notifier_block *nb);
 extern struct notifier_block *i386die_chain;
diff --git a/include/asm-ppc64/kdebug.h b/include/asm-ppc64/kdebug.h
index 488634258a72..d383d161cf8d 100644
--- a/include/asm-ppc64/kdebug.h
+++ b/include/asm-ppc64/kdebug.h
@@ -17,7 +17,7 @@ struct die_args {
 
 /*
    Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_kernel -
+   If you really want to do it first unregister - then synchronize_sched -
    then free.
  */
 int register_die_notifier(struct notifier_block *nb);
diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h
index f70d3dad01f9..6321f5a0198d 100644
--- a/include/asm-sparc64/kdebug.h
+++ b/include/asm-sparc64/kdebug.h
@@ -16,7 +16,7 @@ struct die_args {
 };
 
 /* Note - you should never unregister because that can race with NMIs.
- * If you really want to do it first unregister - then synchronize_kernel
+ * If you really want to do it first unregister - then synchronize_sched
  * - then free.
  */
 int register_die_notifier(struct notifier_block *nb);
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index 6277f75cbb4b..b90341994d80 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -14,7 +14,7 @@ struct die_args {
 }; 
 
 /* Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_kernel - then free. 
+   If you really want to do it first unregister - then synchronize_sched - then free.
   */
 int register_die_notifier(struct notifier_block *nb);
 extern struct notifier_block *die_chain;
diff --git a/include/linux/list.h b/include/linux/list.h
index 399b51d17218..aab2db21b013 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -185,7 +185,7 @@ static inline void list_del(struct list_head *entry)
  * list_for_each_entry_rcu().
  *
  * Note that the caller is not permitted to immediately free
- * the newly deleted entry.  Instead, either synchronize_kernel()
+ * the newly deleted entry.  Instead, either synchronize_rcu()
  * or call_rcu() must be used to defer freeing until an RCU
  * grace period has elapsed.
  */
-- 
cgit v1.2.3


From 7897986bad8f6cd50d6149345aca7f6480f49464 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:13 -0700
Subject: [PATCH] sched: balance timers

Do CPU load averaging over a number of different intervals.  Allow each
interval to be chosen by sending a parameter to source_load and target_load.
0 is instantaneous, idx > 0 returns a decaying average with the most recent
sample weighted at 2^(idx-1).  To a maximum of 3 (could be easily increased).

So generally a higher number will result in more conservative balancing.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/topology.h   | 4 ++++
 include/asm-x86_64/topology.h | 6 +++++-
 include/linux/sched.h         | 4 ++++
 include/linux/topology.h      | 8 ++++++++
 4 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 6d0f67507b21..0055fbfeec7b 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -74,6 +74,10 @@ static inline int node_to_first_cpu(int node)
 	.imbalance_pct		= 125,			\
 	.cache_hot_time		= (10*1000000),		\
 	.cache_nice_tries	= 1,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 2,			\
+	.wake_idx		= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 8f77e9f6bc23..fe8d80a15751 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -39,7 +39,11 @@ extern int __node_distance(int, int);
 	.busy_factor		= 32,			\
 	.imbalance_pct		= 125,			\
 	.cache_hot_time		= (10*1000000),		\
-	.cache_nice_tries	= 1,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 2,			\
+	.newidle_idx		= 1, 			\
+	.wake_idx		= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c69682b0444..664981ac1fb6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -488,6 +488,10 @@ struct sched_domain {
 	unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
 	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
 	unsigned int per_cpu_gain;	/* CPU % gained by adding domain cpus */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
 	int flags;			/* See SD_* */
 
 	/* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d70e8972c67f..ae9c2216dfa6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -89,6 +89,10 @@
 	.cache_hot_time		= 0,			\
 	.cache_nice_tries	= 0,			\
 	.per_cpu_gain		= 25,			\
+	.busy_idx		= 0,			\
+	.idle_idx		= 0,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -115,6 +119,10 @@
 	.cache_hot_time		= (5*1000000/2),	\
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
+	.busy_idx		= 2,			\
+	.idle_idx		= 0,			\
+	.newidle_idx		= 1,			\
+	.wake_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-- 
cgit v1.2.3


From cafb20c1f9976a70d633bb1e1c8c24eab00e4e80 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:17 -0700
Subject: [PATCH] sched: no aggressive idle balancing

Remove the very aggressive idle stuff that has recently gone into 2.6 - it is
going against the direction we are trying to go.  Hopefully we can regain
performance through other methods.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/topology.h   | 1 -
 include/asm-x86_64/topology.h | 1 -
 include/linux/topology.h      | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 0055fbfeec7b..5eb6f61dcefc 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -82,7 +82,6 @@ static inline int node_to_first_cpu(int node)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_IDLE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index fe8d80a15751..9cb7459ce722 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -48,7 +48,6 @@ extern int __node_distance(int, int);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_IDLE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index ae9c2216dfa6..b23ec64df7f1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -127,7 +127,6 @@
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_AFFINE	\
-				| SD_WAKE_IDLE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
-- 
cgit v1.2.3


From 147cbb4bbe991452698f0772d8292f22825710ba Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:19 -0700
Subject: [PATCH] sched: balance on fork

Reimplement the balance on exec balancing to be sched-domains aware.  Use this
to also do balance on fork balancing.  Make x86_64 do balance on fork over the
NUMA domain.

The problem that the non sched domains aware blancing became apparent on dual
core, multi socket opterons.  What we want is for the new tasks to be sent to
a different socket, but more often than not, we would first load up our
sibling core, or fill two cores of a single remote socket before selecting a
new one.

This gives large improvements to STREAM on such systems.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/topology.h |  2 ++
 include/linux/sched.h         | 10 ++++++----
 include/linux/topology.h      |  2 ++
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 9cb7459ce722..802d09b9c99f 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -44,9 +44,11 @@ extern int __node_distance(int, int);
 	.idle_idx		= 2,			\
 	.newidle_idx		= 1, 			\
 	.wake_idx		= 1,			\
+	.forkexec_idx		= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 664981ac1fb6..613491d3a875 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -460,10 +460,11 @@ enum idle_type
 #define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
 #define SD_BALANCE_NEWIDLE	2	/* Balance when about to become idle */
 #define SD_BALANCE_EXEC		4	/* Balance on exec */
-#define SD_WAKE_IDLE		8	/* Wake to idle CPU on task wakeup */
-#define SD_WAKE_AFFINE		16	/* Wake task to waking CPU */
-#define SD_WAKE_BALANCE		32	/* Perform balancing at task wakeup */
-#define SD_SHARE_CPUPOWER	64	/* Domain members share cpu power */
+#define SD_BALANCE_FORK		8	/* Balance on fork, clone */
+#define SD_WAKE_IDLE		16	/* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
+#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
@@ -492,6 +493,7 @@ struct sched_domain {
 	unsigned int idle_idx;
 	unsigned int newidle_idx;
 	unsigned int wake_idx;
+	unsigned int forkexec_idx;
 	int flags;			/* See SD_* */
 
 	/* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index b23ec64df7f1..665597207def 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -93,6 +93,7 @@
 	.idle_idx		= 0,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -123,6 +124,7 @@
 	.idle_idx		= 0,			\
 	.newidle_idx		= 1,			\
 	.wake_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-- 
cgit v1.2.3


From 68767a0ae428801649d510d9a65bb71feed44dd1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:20 -0700
Subject: [PATCH] sched: schedstats update for balance on fork

Add SCHEDSTAT statistics for sched-balance-fork.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 613491d3a875..36a10781c3f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -517,10 +517,16 @@ struct sched_domain {
 	unsigned long alb_failed;
 	unsigned long alb_pushed;
 
-	/* sched_balance_exec() stats */
-	unsigned long sbe_attempts;
+	/* SD_BALANCE_EXEC stats */
+	unsigned long sbe_cnt;
+	unsigned long sbe_balanced;
 	unsigned long sbe_pushed;
 
+	/* SD_BALANCE_FORK stats */
+	unsigned long sbf_cnt;
+	unsigned long sbf_balanced;
+	unsigned long sbf_pushed;
+
 	/* try_to_wake_up() stats */
 	unsigned long ttwu_wake_remote;
 	unsigned long ttwu_move_affine;
-- 
cgit v1.2.3


From 687f1661d302bc70ce906594a6d3f615ef075a50 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:21 -0700
Subject: [PATCH] sched: sched tuning

Do some basic initial tuning.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/topology.h   |  2 +-
 include/asm-ia64/topology.h   | 61 +++++++++++++++++++++++++++++++++----------
 include/asm-x86_64/topology.h |  3 +--
 include/linux/topology.h      | 11 ++++----
 4 files changed, 54 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 5eb6f61dcefc..2461b731781e 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -81,7 +81,7 @@ static inline int node_to_first_cpu(int node)
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_FORK	\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 21cf351fd05c..4e64c2a6b369 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -42,25 +42,54 @@
 
 void build_cpu_to_node_map(void);
 
+#define SD_CPU_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 1,			\
+	.max_interval		= 4,			\
+	.busy_factor		= 64,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000000),		\
+	.per_cpu_gain		= 100,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 2,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 2,			\
+	.wake_idx		= 1,			\
+	.forkexec_idx		= 1,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_WAKE_AFFINE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
 /* sched_domains SD_NODE_INIT for IA64 NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
 	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.groups			= NULL,			\
-	.min_interval		= 80,			\
-	.max_interval		= 320,			\
-	.busy_factor		= 320,			\
+	.min_interval		= 8,			\
+	.max_interval		= 8*(min(num_online_cpus(), 32)), \
+	.busy_factor		= 64,			\
 	.imbalance_pct		= 125,			\
 	.cache_hot_time		= (10*1000000),		\
-	.cache_nice_tries	= 1,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 2,			\
+	.newidle_idx		= 0, /* unused */	\
+	.wake_idx		= 1,			\
+	.forkexec_idx		= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_IDLE		\
+				| SD_BALANCE_FORK	\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
+	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
 }
 
@@ -69,17 +98,21 @@ void build_cpu_to_node_map(void);
 	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.groups			= NULL,			\
-	.min_interval		= 80,			\
-	.max_interval		= 320,			\
-	.busy_factor		= 320,			\
-	.imbalance_pct		= 125,			\
+	.min_interval		= 64,			\
+	.max_interval		= 64*num_online_cpus(),	\
+	.busy_factor		= 128,			\
+	.imbalance_pct		= 133,			\
 	.cache_hot_time		= (10*1000000),		\
 	.cache_nice_tries	= 1,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 3,			\
+	.newidle_idx		= 0, /* unused */	\
+	.wake_idx		= 0, /* unused */	\
+	.forkexec_idx		= 0, /* unused */	\
 	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC,	\
+	.flags			= SD_LOAD_BALANCE,	\
 	.last_balance		= jiffies,		\
-	.balance_interval	= 100*(63+num_online_cpus())/64,   \
+	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
 }
 
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 802d09b9c99f..c1bc3fad482e 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -42,12 +42,11 @@ extern int __node_distance(int, int);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 1, 			\
+	.newidle_idx		= 0, 			\
 	.wake_idx		= 1,			\
 	.forkexec_idx		= 1,			\
 	.per_cpu_gain		= 100,			\
 	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_BALANCE,	\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 665597207def..0320225e96da 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -91,7 +91,7 @@
 	.per_cpu_gain		= 25,			\
 	.busy_idx		= 0,			\
 	.idle_idx		= 0,			\
-	.newidle_idx		= 0,			\
+	.newidle_idx		= 1,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
@@ -121,15 +121,14 @@
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
 	.busy_idx		= 2,			\
-	.idle_idx		= 0,			\
-	.newidle_idx		= 1,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 2,			\
 	.wake_idx		= 1,			\
-	.forkexec_idx		= 0,			\
+	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE,	\
+				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
-- 
cgit v1.2.3


From 4866cde064afbb6c2a488c265e696879de616daa Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:23 -0700
Subject: [PATCH] sched: cleanup context switch locking

Instead of requiring architecture code to interact with the scheduler's
locking implementation, provide a couple of defines that can be used by the
architecture to request runqueue unlocked context switches, and ask for
interrupts to be enabled over the context switch.

Also replaces the "switch_lock" used by these architectures with an oncpu
flag (note, not a potentially slow bitflag).  This eliminates one bus
locked memory operation when context switching, and simplifies the
task_running function.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/system.h     | 30 ++++--------------------------
 include/asm-ia64/system.h    | 10 +---------
 include/asm-mips/system.h    | 10 ++--------
 include/asm-s390/system.h    | 17 +++--------------
 include/asm-sparc/system.h   |  4 +---
 include/asm-sparc64/system.h | 14 ++++----------
 include/linux/init_task.h    |  1 -
 include/linux/sched.h        | 10 ++++++++--
 8 files changed, 23 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index 39dd7008013c..3d0d2860b6db 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -145,34 +145,12 @@ extern unsigned int user_debug;
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
 
-#ifdef CONFIG_SMP
 /*
- * Define our own context switch locking.  This allows us to enable
- * interrupts over the context switch, otherwise we end up with high
- * interrupt latency.  The real problem area is switch_mm() which may
- * do a full cache flush.
+ * switch_mm() may do a full cache flush over the context switch,
+ * so enable interrupts over the context switch to avoid high
+ * latency.
  */
-#define prepare_arch_switch(rq,next)					\
-do {									\
-	spin_lock(&(next)->switch_lock);				\
-	spin_unlock_irq(&(rq)->lock);					\
-} while (0)
-
-#define finish_arch_switch(rq,prev)					\
-	spin_unlock(&(prev)->switch_lock)
-
-#define task_running(rq,p)						\
-	((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
-#else
-/*
- * Our UP-case is more simple, but we assume knowledge of how
- * spin_unlock_irq() and friends are implemented.  This avoids
- * us needlessly decrementing and incrementing the preempt count.
- */
-#define prepare_arch_switch(rq,next)	local_irq_enable()
-#define finish_arch_switch(rq,prev)	spin_unlock(&(rq)->lock)
-#define task_running(rq,p)		((rq)->curr == (p))
-#endif
+#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
 
 /*
  * switch_to(prev, next) should switch from task `prev' to `next'
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 6f516e76d1f0..cd2cf76b2db1 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -183,8 +183,6 @@ do {								\
 
 #ifdef __KERNEL__
 
-#define prepare_to_switch()    do { } while(0)
-
 #ifdef CONFIG_IA32_SUPPORT
 # define IS_IA32_PROCESS(regs)	(ia64_psr(regs)->is != 0)
 #else
@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task);
  * of that CPU which will not be released, because there we wait for the
  * tasklist_lock to become available.
  */
-#define prepare_arch_switch(rq, next)		\
-do {						\
-	spin_lock(&(next)->switch_lock);	\
-	spin_unlock(&(rq)->lock);		\
-} while (0)
-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
-#define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+#define __ARCH_WANT_UNLOCKED_CTXSW
 
 #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
 
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 888fd8908467..169f3d4265b1 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file,
 extern int stop_a_enabled;
 
 /*
- * Taken from include/asm-ia64/system.h; prevents deadlock on SMP
+ * See include/asm-ia64/system.h; prevents deadlock on SMP
  * systems.
  */
-#define prepare_arch_switch(rq, next)		\
-do {						\
-	spin_lock(&(next)->switch_lock);	\
-	spin_unlock(&(rq)->lock);		\
-} while (0)
-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
-#define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+#define __ARCH_WANT_UNLOCKED_CTXSW
 
 #define arch_align_stack(x) (x)
 
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index e3cb3ce1d24a..b4a9f05a93d6 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs)
 	prev = __switch_to(prev,next);					     \
 } while (0)
 
-#define prepare_arch_switch(rq, next)	do { } while(0)
-#define task_running(rq, p)		((rq)->curr == (p))
-
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void account_user_vtime(struct task_struct *);
 extern void account_system_vtime(struct task_struct *);
-
-#define finish_arch_switch(rq, prev) do {				     \
-	set_fs(current->thread.mm_segment);				     \
-	spin_unlock(&(rq)->lock);					     \
-	account_system_vtime(prev);					     \
-	local_irq_enable();						     \
-} while (0)
-
 #else
+#define account_system_vtime(prev) do { } while (0)
+#endif
 
 #define finish_arch_switch(rq, prev) do {				     \
 	set_fs(current->thread.mm_segment);				     \
-	spin_unlock_irq(&(rq)->lock);					     \
+	account_system_vtime(prev);					     \
 } while (0)
 
-#endif
-
 #define nop() __asm__ __volatile__ ("nop")
 
 #define xchg(ptr,x) \
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 80cf20cfaee1..898562ebe94c 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
  * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
  * XXX WTF is the above comment? Found in late teen 2.4.x.
  */
-#define prepare_arch_switch(rq, next) do { \
+#define prepare_arch_switch(next) do { \
 	__asm__ __volatile__( \
 	".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
 	"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
 	"save %sp, -0x40, %sp\n\t" \
 	"restore; restore; restore; restore; restore; restore; restore"); \
 } while(0)
-#define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
-#define task_running(rq, p)		((rq)->curr == (p))
 
 	/* Much care has gone into this code, do not touch it.
 	 *
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index fd12ca386f48..f9be2c5b4dc9 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -139,19 +139,13 @@ extern void __flushw_user(void);
 #define flush_user_windows flushw_user
 #define flush_register_windows flushw_all
 
-#define prepare_arch_switch(rq, next)		\
-do {	spin_lock(&(next)->switch_lock);	\
-	spin_unlock(&(rq)->lock);		\
+/* Don't hold the runqueue lock over context switch */
+#define __ARCH_WANT_UNLOCKED_CTXSW
+#define prepare_arch_switch(next)		\
+do {						\
 	flushw_all();				\
 } while (0)
 
-#define finish_arch_switch(rq, prev)		\
-do {	spin_unlock_irq(&(prev)->switch_lock);	\
-} while (0)
-
-#define task_running(rq, p) \
-	((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
-
 	/* See what happens when you design the chip correctly?
 	 *
 	 * We tell gcc we clobber all non-fixed-usage registers except
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6a8c1a38d5e..03206a425d7a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,7 +108,6 @@ extern struct group_info init_groups;
 	.blocked	= {{0}},					\
 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\
 	.proc_lock	= SPIN_LOCK_UNLOCKED,				\
-	.switch_lock	= SPIN_LOCK_UNLOCKED,				\
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 36a10781c3f3..d27be9337425 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -368,6 +368,11 @@ struct signal_struct {
 #endif
 };
 
+/* Context switch must be unlocked if interrupts are to be enabled */
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+# define __ARCH_WANT_UNLOCKED_CTXSW
+#endif
+
 /*
  * Bits in flags field of signal_struct.
  */
@@ -594,6 +599,9 @@ struct task_struct {
 
 	int lock_depth;		/* BKL lock depth */
 
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+	int oncpu;
+#endif
 	int prio, static_prio;
 	struct list_head run_list;
 	prio_array_t *array;
@@ -716,8 +724,6 @@ struct task_struct {
 	spinlock_t alloc_lock;
 /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
 	spinlock_t proc_lock;
-/* context-switch lock */
-	spinlock_t switch_lock;
 
 /* journalling filesystem info */
 	void *journal_info;
-- 
cgit v1.2.3


From 476d139c218e44e045e4bc6d4cc02b010b343939 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:29 -0700
Subject: [PATCH] sched: consolidate sbe sbf

Consolidate balance-on-exec with balance-on-fork.  This is made easy by the
sched-domains RCU patches.

As well as the general goodness of code reduction, this allows the runqueues
to be unlocked during balance-on-fork.

schedstats is a problem.  Maybe just have balance-on-event instead of
distinguishing fork and exec?

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d27be9337425..edb2c69a8873 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void FASTCALL(sched_fork(task_t * p));
+extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
 extern void FASTCALL(sched_exit(task_t * p));
 
 extern int in_group_p(gid_t);
-- 
cgit v1.2.3


From 1a20ff27ef75d866730ee796acd811a925af762f Mon Sep 17 00:00:00 2001
From: Dinakar Guniguntala <dino@in.ibm.com>
Date: Sat, 25 Jun 2005 14:57:33 -0700
Subject: [PATCH] Dynamic sched domains: sched changes

The following patches add dynamic sched domains functionality that was
extensively discussed on lkml and lse-tech.  I would like to see this added to
-mm

o The main advantage with this feature is that it ensures that the scheduler
  load balacing code only balances against the cpus that are in the sched
  domain as defined by an exclusive cpuset and not all of the cpus in the
  system. This removes any overhead due to load balancing code trying to
  pull tasks outside of the cpu exclusive cpuset only to be prevented by
  the tasks' cpus_allowed mask.
o cpu exclusive cpusets are useful for servers running orthogonal
  workloads such as RT applications requiring low latency and HPC
  applications that are throughput sensitive

o It provides a new API partition_sched_domains in sched.c
  that makes dynamic sched domains possible.
o cpu_exclusive cpusets sets are now associated with a sched domain.
  Which means that the users can dynamically modify the sched domains
  through the cpuset file system interface
o ia64 sched domain code has been updated to support this feature as well
o Currently, this does not support hotplug. (However some of my tests
  indicate hotplug+preempt is currently broken)
o I have tested it extensively on x86.
o This should have very minimal impact on performance as none of
  the fast paths are affected

Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com>
Acked-by: Paul Jackson <pj@sgi.com>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index edb2c69a8873..98c109e4f43d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -539,6 +539,8 @@ struct sched_domain {
 #endif
 };
 
+extern void partition_sched_domains(cpumask_t *partition1,
+				    cpumask_t *partition2);
 #ifdef ARCH_HAS_SCHED_DOMAIN
 /* Useful helpers that arch setup code may use. Defined in kernel/sched.c */
 extern cpumask_t cpu_isolated_map;
-- 
cgit v1.2.3


From f8cbd99bd3a023db8d6356d19a5f6f539d367327 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 25 Jun 2005 14:57:39 -0700
Subject: [PATCH] sched: voluntary kernel preemption

This patch adds a new preemption model: 'Voluntary Kernel Preemption'.  The
3 models can be selected from a new menu:

            (X) No Forced Preemption (Server)
            ( ) Voluntary Kernel Preemption (Desktop)
            ( ) Preemptible Kernel (Low-Latency Desktop)

we still default to the stock (Server) preemption model.

Voluntary preemption works by adding a cond_resched()
(reschedule-if-needed) call to every might_sleep() check.  It is lighter
than CONFIG_PREEMPT - at the cost of not having as tight latencies.  It
represents a different latency/complexity/overhead tradeoff.

It has no runtime impact at all if disabled.  Here are size stats that show
how the various preemption models impact the kernel's size:

    text    data     bss     dec     hex filename
 3618774  547184  179896 4345854  424ffe vmlinux.stock
 3626406  547184  179896 4353486  426dce vmlinux.voluntary   +0.2%
 3748414  548640  179896 4476950  445016 vmlinux.preempt     +3.5%

voluntary-preempt is +0.2% of .text, preempt is +3.5%.

This feature has been tested for many months by lots of people (and it's
also included in the RHEL4 distribution and earlier variants were in Fedora
as well), and it's intended for users and distributions who dont want to
use full-blown CONFIG_PREEMPT for one reason or another.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e25b97062ce1..687ba8c9973d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,15 +58,23 @@ struct completion;
  * be biten later when the calling function happens to sleep when it is not
  * supposed to.
  */
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+extern int cond_resched(void);
+# define might_resched() cond_resched()
+#else
+# define might_resched() do { } while (0)
+#endif
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-#define might_sleep() __might_sleep(__FILE__, __LINE__)
-#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
-void __might_sleep(char *file, int line);
+  void __might_sleep(char *file, int line);
+# define might_sleep() \
+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 #else
-#define might_sleep() do {} while(0)
-#define might_sleep_if(cond) do {} while (0)
+# define might_sleep() do { might_resched(); } while (0)
 #endif
 
+#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
+
 #define abs(x) ({				\
 		int __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
-- 
cgit v1.2.3


From 8f43d03fe2c4962c11d8227ac9505e590bad758b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:40 -0700
Subject: [PATCH] kexec: x86: rename APIC_MODE_EXINT

From: "Maciej W. Rozycki" <macro@linux-mips.org>

Rename APIC_MODE_EXINT to APIC_MODE_EXTINT - I think it should be named
after what the mode is called in documentation.

From: "Eric W. Biederman" <ebiederm@lnxi.com>

I have reduced this patch to just the name change in the header.  And
integrated the changes into the patches that add those
lines. Otherwise I ran into some ugly dependencies.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/apicdef.h   | 2 +-
 include/asm-x86_64/apicdef.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h
index c689554ad5b9..41e8d2d918e0 100644
--- a/include/asm-i386/apicdef.h
+++ b/include/asm-i386/apicdef.h
@@ -90,7 +90,7 @@
 #define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
 #define				APIC_MODE_FIXED		0x0
 #define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXINT		0x7
+#define				APIC_MODE_EXTINT	0x7
 #define 	APIC_LVT1	0x360
 #define		APIC_LVTERR	0x370
 #define		APIC_TMICT	0x380
diff --git a/include/asm-x86_64/apicdef.h b/include/asm-x86_64/apicdef.h
index bfebdb690654..9388062c4f6e 100644
--- a/include/asm-x86_64/apicdef.h
+++ b/include/asm-x86_64/apicdef.h
@@ -94,7 +94,7 @@
 #define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
 #define				APIC_MODE_FIXED		0x0
 #define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXINT		0x7
+#define				APIC_MODE_EXTINT	0x7
 #define 	APIC_LVT1	0x360
 #define		APIC_LVTERR	0x370
 #define		APIC_TMICT	0x380
-- 
cgit v1.2.3


From 9635b47d910223745258768418003580ef7dba17 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:41 -0700
Subject: [PATCH] kexec: x86: local apic fix

From: "Maciej W. Rozycki" <macro@linux-mips.org>

Fix a kexec problem whcih causes local APIC detection failure.

The problem is detect_init_APIC() is called early, before the command line
have been processed.  Therefore "lapic" (and "nolapic") have not been seen,
yet.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/apic.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index a5810cf7b578..53268cd9306e 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -5,6 +5,7 @@
 #include <linux/pm.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
+#include <asm/processor.h>
 #include <asm/system.h>
 
 #define Dprintk(x...)
@@ -16,8 +17,20 @@
 #define APIC_VERBOSE 1
 #define APIC_DEBUG   2
 
+extern int enable_local_apic;
 extern int apic_verbosity;
 
+static inline void lapic_disable(void)
+{
+	enable_local_apic = -1;
+	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+}
+
+static inline void lapic_enable(void)
+{
+	enable_local_apic = 1;
+}
+
 /*
  * Define the default level of output to be very little
  * This can be turned up by using apic=verbose for more
-- 
cgit v1.2.3


From 650927ef8ab1e9b05b77a3f32ca7adcedaae9306 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:44 -0700
Subject: [PATCH] kexec: x86: resture apic virtual wire mode on shutdown

When coming out of apic mode attempt to set the appropriate
apic back into virtual wire mode.  This improves on previous versions
of this patch by by never setting bot the local apic and the ioapic
into veritual wire mode.

This code looks at data from the mptable to see if an ioapic has
an ExtInt input to make this decision.  A future improvement
is to figure out which apic or ioapic was in virtual wire mode
at boot time and to remember it.  That is potentially a more accurate
method, of selecting which apic to place in virutal wire mode.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/apic.h    | 2 +-
 include/asm-i386/apicdef.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 53268cd9306e..6a1b1882285c 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -100,7 +100,7 @@ extern void (*wait_timer_tick)(void);
 extern int get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (int virt_wire_setup);
 extern void disable_local_APIC (void);
 extern void lapic_shutdown (void);
 extern int verify_local_APIC (void);
diff --git a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h
index 41e8d2d918e0..0fed5e3c699c 100644
--- a/include/asm-i386/apicdef.h
+++ b/include/asm-i386/apicdef.h
@@ -86,6 +86,7 @@
 #define			APIC_LVT_REMOTE_IRR		(1<<14)
 #define			APIC_INPUT_POLARITY		(1<<13)
 #define			APIC_SEND_PENDING		(1<<12)
+#define			APIC_MODE_MASK			0x700
 #define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
 #define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
 #define				APIC_MODE_FIXED		0x0
-- 
cgit v1.2.3


From 208fb93162d51faa69b9774fa7809858d84fd9dc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:45 -0700
Subject: [PATCH] kexec: x86_64: restore apic virtual wire mode on shutdown

When coming out of apic mode attempt to set the appropriate
apic back into virtual wire mode.  This improves on previous versions
of this patch by by never setting bot the local apic and the ioapic
into veritual wire mode.

This code looks at data from the mptable to see if an ioapic has
an ExtInt input to make this decision.  A future improvement
is to figure out which apic or ioapic was in virtual wire mode
at boot time and to remember it.  That is potentially a more accurate
method, of selecting which apic to place in virutal wire mode.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/apic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index e4b1017b8b2b..16ec82e16b21 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -77,7 +77,7 @@ static inline void ack_APIC_irq(void)
 extern int get_maxlvt (void);
 extern void clear_local_APIC (void);
 extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (int virt_wire_setup);
 extern void disable_local_APIC (void);
 extern int verify_local_APIC (void);
 extern void cache_APIC_registers (void);
-- 
cgit v1.2.3


From 60bad7fadf59313a6359f8828bb0087884ad001a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:46 -0700
Subject: [PATCH] kexec: vmlinux: fix physical addresses

In vmlinux.lds.h the code is carefull to define every section so vmlinux
properly reports the correct physical load address of code, as well as
it's virtual address.

The new SECURITY_INIT definition fails to follow that convention and
and causes incorrect physical address to appear in the vmlinux if
there are any security initcalls.

This patch updates the SECURITY_INIT to follow the convention in the rest of
the file.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 99cef06a364a..b3bb326ae5b6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -73,7 +73,7 @@
 	}
 
 #define SECURITY_INIT							\
-	.security_initcall.init : {					\
+	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
 		*(.security_initcall.init) 				\
 		VMLINUX_SYMBOL(__security_initcall_end) = .;		\
-- 
cgit v1.2.3


From 3d345e3fc9e9177deb7c82e5c79e32d77eb63cce Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:49 -0700
Subject: [PATCH] kexec: x86: add CONFIG_PYSICAL_START

For one kernel to report a crash another kernel has created we need
to have 2 kernels loaded simultaneously in memory.  To accomplish this
the two kernels need to built to run at different physical addresses.

This patch adds the CONFIG_PHYSICAL_START option to the x86 kernel
so we can do just that.  You need to know what you are doing and
the ramifications are before changing this value, and most users
won't care so I have made it depend on CONFIG_EMBEDDED

bzImage kernels will work and run at a different address when compiled
with this option but they will still load at 1MB.  If you need a kernel
loaded at a different address as well you need to boot a vmlinux.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/page.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index dea8f8e6d86e..8d93f732d72d 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -126,9 +126,12 @@ extern int page_is_ram(unsigned long pagenr);
 
 #ifdef __ASSEMBLY__
 #define __PAGE_OFFSET		(0xC0000000)
+#define __PHYSICAL_START	CONFIG_PHYSICAL_START
 #else
 #define __PAGE_OFFSET		(0xC0000000UL)
+#define __PHYSICAL_START	((unsigned long)CONFIG_PHYSICAL_START)
 #endif
+#define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
-- 
cgit v1.2.3


From d0537508a9921efced238b20967e50e519ac34af Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:52 -0700
Subject: [PATCH] kexec: x86_64: add CONFIG_PHYSICAL_START

For one kernel to report a crash another kernel has created we need
to have 2 kernels loaded simultaneously in memory.  To accomplish this
the two kernels need to built to run at different physical addresses.

This patch adds the CONFIG_PHYSICAL_START option to the x86_64 kernel
so we can do just that.  You need to know what you are doing and
the ramifications are before changing this value, and most users
won't care so I have made it depend on CONFIG_EMBEDDED

bzImage kernels will work and run at a different address when compiled
with this option but they will still load at 1MB.  If you need a kernel
loaded at a different address as well you need to boot a vmlinux.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/page.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 60130f4ca986..431318764af6 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -64,12 +64,14 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define __pgd(x) ((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-#define __START_KERNEL		0xffffffff80100000UL
+#define __PHYSICAL_START	((unsigned long)CONFIG_PHYSICAL_START)
+#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	0xffffffff80000000UL
 #define __PAGE_OFFSET           0xffff810000000000UL
 
 #else
-#define __START_KERNEL		0xffffffff80100000
+#define __PHYSICAL_START	CONFIG_PHYSICAL_START
+#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	0xffffffff80000000
 #define __PAGE_OFFSET           0xffff810000000000
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3


From dc009d92435f99498cbc579ce76bf28e837e2c14 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:52 -0700
Subject: [PATCH] kexec: add kexec syscalls

This patch introduces the architecture independent implementation the
sys_kexec_load, the compat_sys_kexec_load system calls.

Kexec on panic support has been integrated into the core patch and is
relatively clean.

In addition the hopefully architecture independent option
crashkernel=size@location has been docuemented.  It's purpose is to reserve
space for the panic kernel to live, and where no DMA transfer will ever be
setup to access.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h    | 127 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/reboot.h   |   3 ++
 include/linux/syscalls.h |   5 +-
 3 files changed, 133 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/kexec.h

(limited to 'include')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
new file mode 100644
index 000000000000..e3fc35f4e35f
--- /dev/null
+++ b/include/linux/kexec.h
@@ -0,0 +1,127 @@
+#ifndef LINUX_KEXEC_H
+#define LINUX_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/linkage.h>
+#include <linux/compat.h>
+#include <asm/kexec.h>
+
+/* Verify architecture specific macros are defined */
+
+#ifndef KEXEC_SOURCE_MEMORY_LIMIT
+#error KEXEC_SOURCE_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_DESTINATION_MEMORY_LIMIT
+#error KEXEC_DESTINATION_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_MEMORY_LIMIT
+#error KEXEC_CONTROL_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_CODE_SIZE
+#error KEXEC_CONTROL_CODE_SIZE not defined
+#endif
+
+#ifndef KEXEC_ARCH
+#error KEXEC_ARCH not defined
+#endif
+
+/*
+ * This structure is used to hold the arguments that are used when loading
+ * kernel binaries.
+ */
+
+typedef unsigned long kimage_entry_t;
+#define IND_DESTINATION  0x1
+#define IND_INDIRECTION  0x2
+#define IND_DONE         0x4
+#define IND_SOURCE       0x8
+
+#define KEXEC_SEGMENT_MAX 8
+struct kexec_segment {
+	void __user *buf;
+	size_t bufsz;
+	unsigned long mem;	/* User space sees this as a (void *) ... */
+	size_t memsz;
+};
+
+#ifdef CONFIG_COMPAT
+struct compat_kexec_segment {
+	compat_uptr_t buf;
+	compat_size_t bufsz;
+	compat_ulong_t mem;	/* User space sees this as a (void *) ... */
+	compat_size_t memsz;
+};
+#endif
+
+struct kimage {
+	kimage_entry_t head;
+	kimage_entry_t *entry;
+	kimage_entry_t *last_entry;
+
+	unsigned long destination;
+
+	unsigned long start;
+	struct page *control_code_page;
+
+	unsigned long nr_segments;
+	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
+
+	struct list_head control_pages;
+	struct list_head dest_pages;
+	struct list_head unuseable_pages;
+
+	/* Address of next control page to allocate for crash kernels. */
+	unsigned long control_page;
+
+	/* Flags to indicate special processing */
+	unsigned int type : 1;
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH   1
+};
+
+
+
+/* kexec interface functions */
+extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
+extern int machine_kexec_prepare(struct kimage *image);
+extern void machine_kexec_cleanup(struct kimage *image);
+extern asmlinkage long sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct kexec_segment __user *segments,
+	unsigned long flags);
+#ifdef CONFIG_COMPAT
+extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct compat_kexec_segment __user *segments,
+	unsigned long flags);
+#endif
+extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
+extern void crash_kexec(void);
+extern struct kimage *kexec_image;
+
+#define KEXEC_ON_CRASH  0x00000001
+#define KEXEC_ARCH_MASK 0xffff0000
+
+/* These values match the ELF architecture values.
+ * Unless there is a good reason that should continue to be the case.
+ */
+#define KEXEC_ARCH_DEFAULT ( 0 << 16)
+#define KEXEC_ARCH_386     ( 3 << 16)
+#define KEXEC_ARCH_X86_64  (62 << 16)
+#define KEXEC_ARCH_PPC     (20 << 16)
+#define KEXEC_ARCH_PPC64   (21 << 16)
+#define KEXEC_ARCH_IA_64   (50 << 16)
+
+#define KEXEC_FLAGS    (KEXEC_ON_CRASH)  /* List of defined/legal kexec flags */
+
+/* Location of a reserved region to hold the crash kernel.
+ */
+extern struct resource crashk_res;
+
+#else /* !CONFIG_KEXEC */
+static inline void crash_kexec(void) { }
+#endif /* CONFIG_KEXEC */
+#endif /* LINUX_KEXEC_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index d60fafc8bdc5..c5a05e16edb2 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -51,6 +51,9 @@ extern void machine_restart(char *cmd);
 extern void machine_halt(void);
 extern void machine_power_off(void);
 
+extern void machine_shutdown(void);
+extern void machine_crash_shutdown(void);
+
 #endif
 
 #endif /* _LINUX_REBOOT_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c39f6f72cbbc..7ba8f8f747aa 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -159,8 +159,9 @@ asmlinkage long sys_shutdown(int, int);
 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
 				void __user *arg);
 asmlinkage long sys_restart_syscall(void);
-asmlinkage long sys_kexec_load(void *entry, unsigned long nr_segments,
-			struct kexec_segment *segments, unsigned long flags);
+asmlinkage long sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct kexec_segment __user *segments,
+	unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage void sys_exit_group(int error_code);
-- 
cgit v1.2.3


From 5033cba087f6ac773002123aafbea1aad4267682 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:56 -0700
Subject: [PATCH] kexec: x86 kexec core

This is the i386 implementation of kexec.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/kexec.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/asm-i386/kexec.h

(limited to 'include')

diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h
new file mode 100644
index 000000000000..a1599b55d62d
--- /dev/null
+++ b/include/asm-i386/kexec.h
@@ -0,0 +1,28 @@
+#ifndef _I386_KEXEC_H
+#define _I386_KEXEC_H
+
+#include <asm/fixmap.h>
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
+ * calculation for the amount of memory directly mappable into the
+ * kernel memory space.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE	4096
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_386
+
+#endif /* _I386_KEXEC_H */
-- 
cgit v1.2.3


From 5234f5eb04abbbfa306ccfbc2ccbb6e73f515b15 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:58:02 -0700
Subject: [PATCH] kexec: x86_64 kexec implementation

This is the x86_64 implementation of machine kexec.  32bit compatibility
support has been implemented, and machine_kexec has been enhanced to not care
about the changing internal kernel paget table structures.

From: Alexander Nyberg <alexn@dsv.su.se>

      build fix

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/kexec.h  | 28 ++++++++++++++++++++++++++++
 include/asm-x86_64/unistd.h |  2 +-
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-x86_64/kexec.h

(limited to 'include')

diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
new file mode 100644
index 000000000000..dc33646dc7dd
--- /dev/null
+++ b/include/asm-x86_64/kexec.h
@@ -0,0 +1,28 @@
+#ifndef _X86_64_KEXEC_H
+#define _X86_64_KEXEC_H
+
+#include <asm/page.h>
+#include <asm/proto.h>
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * So far x86_64 is limited to 40 physical address bits.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT      (0xFFFFFFFFFFUL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+/* Maximum address we can use for the control pages */
+#define KEXEC_CONTROL_MEMORY_LIMIT     (0xFFFFFFFFFFUL)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_CODE_SIZE  (4096UL + 4096UL)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_X86_64
+
+#endif /* _X86_64_KEXEC_H */
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 3c9af6fd4332..d767adcbf0ff 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -552,7 +552,7 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
 #define __NR_mq_getsetattr 	245
 __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
 #define __NR_kexec_load 	246
-__SYSCALL(__NR_kexec_load, sys_ni_syscall)
+__SYSCALL(__NR_kexec_load, sys_kexec_load)
 #define __NR_waitid		247
 __SYSCALL(__NR_waitid, sys_waitid)
 #define __NR_add_key		248
-- 
cgit v1.2.3


From 70765aa4bdb8862a49fcf5b28f3deaf561cf5ae7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:58:07 -0700
Subject: [PATCH] kexec: kexec ppc support

I have tweaked this patch slightly to handle an empty list
of pages to relocate passed to relocate_new_kernel.  And
I have added ppc_md.machine_crash_shutdown.  To keep up with
the changes in the generic kexec infrastructure.

From: Albert Herranz <albert_herranz@yahoo.es>

The following patch adds support for kexec on the ppc32 platform.

Non-OpenFirmware based platforms are likely to work directly without
additional changes on the kernel side.  The kexec-tools userland package
may need to be slightly updated, though.

For OpenFirmware based machines, additional work is still needed on the
kernel side before kexec support is ready.  Benjamin Herrenschmidt is
kindly working on that part.

In order for a ppc platform to use the kexec kernel services it must
implement some ppc_md hooks.  Otherwise, kexec will be explicitly disabled,
as suggested by benh.

There are 3+1 new ppc_md hooks that a platform supporting kexec may
implement.  Two of them are mandatory for kexec to work.  See
include/asm-ppc/machdep.h for details.

- machine_kexec_prepare(image)

  This function is called to make any arrangements to the image before it
  is loaded.

  This hook _MUST_ be provided by a platform in order to activate kexec
  support for that platform.  Otherwise, the platform is considered to not
  support kexec and the kexec_load system call will fail (that makes all
  existing platforms by default non-kexec'able).

- machine_kexec_cleanup(image)

  This function is called to make any cleanups on image after the loaded
  image data it is freed.  This hook is optional.  A platform may or may
  not provide this hook.

- machine_kexec(image)

  This function is called to perform the _actual_ kexec.  This hook
  _MUST_ be provided by a platform in order to activate kexec support for
  that platform.

  If a platform provides machine_kexec_prepare but forgets to provide
  machine_kexec, a kexec will fall back to a reboot.

  A ready-to-use machine_kexec_simple() generic function is provided to,
  hopefully, simplify kexec adoption for embedded platforms.  A platform
  may call this function from its specific machine_kexec hook, like this:

void myplatform_kexec(struct kimage *image)
{
        machine_kexec_simple(image);
}

- machine_shutdown()

  This function is called to perform any machine specific shutdowns, not
  already done by drivers.  This hook is optional.  A platform may or may
  not provide this hook.

An example (trimmed) platform specific module for a platform supporting
kexec through the existing machine_kexec_simple follows:

/* ... */

#ifdef CONFIG_KEXEC
int myplatform_kexec_prepare(struct kimage *image)
{
        /* here, we can place additional preparations
*/
        return 0; /* yes, we support kexec */
}

void myplatform_kexec(struct kimage *image)
{
        machine_kexec_simple(image);
}
#endif /* CONFIG_KEXEC */

/* ... */

void __init
platform_init(unsigned long r3, unsigned long r4,
unsigned long r5,
              unsigned long r6, unsigned long r7)
{

/* ... */

#ifdef CONFIG_KEXEC
        ppc_md.machine_kexec_prepare =
myplatform_kexec_prepare;
        ppc_md.machine_kexec         =
myplatform_kexec;
#endif /* CONFIG_KEXEC */

/* ... */

}

The kexec ppc kernel support has been heavily tested on the GameCube Linux
port, and, as reported in the fastboot mailing list, it has been tested too
on a Moto 82xx ppc by Rick Richardson.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/kexec.h   | 38 ++++++++++++++++++++++++++++++++++++++
 include/asm-ppc/machdep.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 include/asm-ppc/kexec.h

(limited to 'include')

diff --git a/include/asm-ppc/kexec.h b/include/asm-ppc/kexec.h
new file mode 100644
index 000000000000..73191310d8db
--- /dev/null
+++ b/include/asm-ppc/kexec.h
@@ -0,0 +1,38 @@
+#ifndef _PPC_KEXEC_H
+#define _PPC_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ *
+ * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
+ * calculation for the amount of memory directly mappable into the
+ * kernel memory space.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE	4096
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_PPC
+
+#ifndef __ASSEMBLY__
+
+struct kimage;
+
+extern void machine_kexec_simple(struct kimage *image);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _PPC_KEXEC_H */
diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h
index b78d40870c95..1d4ab70a56f3 100644
--- a/include/asm-ppc/machdep.h
+++ b/include/asm-ppc/machdep.h
@@ -4,6 +4,7 @@
 
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/kexec.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
@@ -114,6 +115,36 @@ struct machdep_calls {
 	/* functions for dealing with other cpus */
 	struct smp_ops_t *smp_ops;
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KEXEC
+	/* Called to shutdown machine specific hardware not already controlled
+	 * by other drivers.
+	 * XXX Should we move this one out of kexec scope?
+	 */
+	void (*machine_shutdown)(void);
+
+	/* Called to do the minimal shutdown needed to run a kexec'd kernel
+	 * to run successfully.
+	 * XXX Should we move this one out of kexec scope?
+	 */
+	void (*machine_crash_shutdown)(void);
+
+	/* Called to do what every setup is needed on image and the
+	 * reboot code buffer. Returns 0 on success.
+	 * Provide your own (maybe dummy) implementation if your platform
+	 * claims to support kexec.
+	 */
+	int (*machine_kexec_prepare)(struct kimage *image);
+
+	/* Called to handle any machine specific cleanup on image */
+	void (*machine_kexec_cleanup)(struct kimage *image);
+
+	/* Called to perform the _real_ kexec.
+	 * Do NOT allocate memory or fail here. We are past the point of
+	 * no return.
+	 */
+	void (*machine_kexec)(struct kimage *image);
+#endif /* CONFIG_KEXEC */
 };
 
 extern struct machdep_calls ppc_md;
-- 
cgit v1.2.3


From f4c82d5132b0592f5d6befc5b652cbd4b08f12ff Mon Sep 17 00:00:00 2001
From: R Sharada <sharada@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:08 -0700
Subject: [PATCH] ppc64 kexec: native hash clear

Add code to clear the hash table and invalidate the tlb for native (SMP,
non-LPAR) mode.  Supports 16M and 4k pages.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: R Sharada <sharada@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc64/mmu.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index 9d03a98a4fa3..f373de5e3dd9 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -181,6 +181,28 @@ static inline void tlbiel(unsigned long va)
 	asm volatile("ptesync": : :"memory");
 }
 
+static inline unsigned long slot2va(unsigned long avpn, unsigned long large,
+		unsigned long secondary, unsigned long slot)
+{
+	unsigned long va;
+
+	va = avpn << 23;
+
+	if (!large) {
+		unsigned long vpi, pteg;
+
+		pteg = slot / HPTES_PER_GROUP;
+		if (secondary)
+			pteg = ~pteg;
+
+		vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+		va |= vpi << PAGE_SHIFT;
+	}
+
+	return va;
+}
+
 /*
  * Handle a fault by adding an HPTE. If the address can't be determined
  * to be valid via Linux page tables, return 1. If handled return 0
-- 
cgit v1.2.3


From fce0d5740322b98b863f9e609f5a9bd4c06703af Mon Sep 17 00:00:00 2001
From: R Sharada <sharada@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:10 -0700
Subject: [PATCH] ppc64: kexec support for ppc64

This patch implements the kexec support for ppc64 platforms.

A couple of notes:

1)  We copy the pages in virtual mode, using the full base kernel
    and a statically allocated stack.   At kexec_prepare time we
    scan the pages and if any overlap our (0, _end[]) range we
    return -ETXTBSY.

    On PowerPC 64 systems running in LPAR (logical partitioning)
    mode, only a small region of memory, referred to as the RMO,
    can be accessed in real mode.  Since Linux runs with only one
    zone of memory in the memory allocator, and it can be orders of
    magnitude more memory than the RMO, looping until we allocate
    pages in the source region is not feasible.  Copying in virtual
    means we don't have to write a hash table generation and call
    hypervisor to insert translations, instead we rely on the pinned
    kernel linear mapping.  The kernel already has move to linked
    location built in, so there is no requirement to load it at 0.

    If we want to load something other than a kernel, then a stub
    can be written to copy a linear chunk in real mode.

2)  The start entry point gets passed parameters from the kernel.
    Slaves are started at a fixed address after copying code from
    the entry point.

    All CPUs get passed their firmware assigned physical id in r3
    (most calling conventions use this register for the first
    argument).

    This is used to distinguish each CPU from all other CPUs.
    Since firmware is not around, there is no other way to obtain
    this information other than to pass it somewhere.

    A single CPU, referred to here as the master and the one executing
    the kexec call, branches to start with the address of start in r4.
    While this can be calculated, we have to load it through a gpr to
    branch to this point so defining the register this is contained
    in is free.  A stack of unspecified size is available at r1
    (also common calling convention).

    All remaining running CPUs are sent to start at absolute address
    0x60 after copying the first 0x100 bytes from start to address 0.
    This convention was chosen because it matches what the kernel
    has been doing itself.  (only gpr3 is defined).

    Note: This is not quite the convention of the kexec bootblock v2
    in the kernel.  A stub has been written to convert between them,
    and we may adjust the kernel in the future to allow this directly
    without any stub.

3)  Destination pages can be placed anywhere, even where they
    would not be accessible in real mode.  This will allow us to
    place ram disks above the RMO if we choose.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: R Sharada <sharada@in.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc64/kexec.h   | 41 +++++++++++++++++++++++++++++++++++++++++
 include/asm-ppc64/machdep.h |  1 +
 include/asm-ppc64/xics.h    |  1 +
 3 files changed, 43 insertions(+)
 create mode 100644 include/asm-ppc64/kexec.h

(limited to 'include')

diff --git a/include/asm-ppc64/kexec.h b/include/asm-ppc64/kexec.h
new file mode 100644
index 000000000000..511908afaeeb
--- /dev/null
+++ b/include/asm-ppc64/kexec.h
@@ -0,0 +1,41 @@
+#ifndef _PPC64_KEXEC_H
+#define _PPC64_KEXEC_H
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+/* XXX: since we copy virt we can use any page we allocate */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+/* XXX: I want to allow initrd in highmem.  otherwise set to rmo on lpar */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+/* XXX: unused today, ppc32 uses TASK_SIZE, probably left over from use_mm  */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* XXX: today we don't use this at all, althogh we have a static stack */
+#define KEXEC_CONTROL_CODE_SIZE 4096
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_PPC64
+
+#define MAX_NOTE_BYTES 1024
+
+#ifndef __ASSEMBLY__
+
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
+extern void kexec_smp_wait(void);	/* get and clear naca physid, wait for
+					  master to copy new code to 0 */
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PPC_KEXEC_H */
+
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h
index 553b2ea23bed..9cdad3ed1526 100644
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -86,6 +86,7 @@ struct machdep_calls {
 
 	void		(*init_IRQ)(void);
 	int		(*get_irq)(struct pt_regs *);
+	void		(*cpu_irq_down)(void);
 
 	/* PCI stuff */
 	void		(*pcibios_fixup)(void);
diff --git a/include/asm-ppc64/xics.h b/include/asm-ppc64/xics.h
index fdec5e7a7af6..0c45e14e26ca 100644
--- a/include/asm-ppc64/xics.h
+++ b/include/asm-ppc64/xics.h
@@ -17,6 +17,7 @@
 void xics_init_IRQ(void);
 int xics_get_irq(struct pt_regs *);
 void xics_setup_cpu(void);
+void xics_teardown_cpu(void);
 void xics_cause_IPI(int cpu);
 void xics_request_IPIs(void);
 void xics_migrate_irqs_away(void);
-- 
cgit v1.2.3


From cf13f0eaffa31bf6a145c53c589654b11c72ddc7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 25 Jun 2005 14:58:11 -0700
Subject: [PATCH] kexec: s390 support

Add kexec support for s390 architecture.

From: Milton Miller <miltonm@bga.com>

- Fix passing of first argument to relocate_kernel assembly.
- Fix Kconfig description.
- Remove wrong comment and comments that describe obvious things.
- Allow only KEXEC_TYPE_DEFAULT as image type -> dump not supported.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/kexec.h  | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-s390/unistd.h |  2 +-
 include/linux/kexec.h     |  1 +
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-s390/kexec.h

(limited to 'include')

diff --git a/include/asm-s390/kexec.h b/include/asm-s390/kexec.h
new file mode 100644
index 000000000000..54cf7d9f251c
--- /dev/null
+++ b/include/asm-s390/kexec.h
@@ -0,0 +1,42 @@
+/*
+ * include/asm-s390/kexec.h
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+#ifndef _S390_KEXEC_H
+#define _S390_KEXEC_H
+
+#include <asm/page.h>
+#include <asm/processor.h>
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control pages */
+/* Not more than 2GB */
+#define KEXEC_CONTROL_MEMORY_LIMIT (1<<31)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_CODE_SIZE 4096
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_S390
+
+#define MAX_NOTE_BYTES 1024
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
+#endif /*_S390_KEXEC_H */
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index f1a204f7c0f0..363db45f8d07 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -269,7 +269,7 @@
 #define __NR_mq_timedreceive	274
 #define __NR_mq_notify		275
 #define __NR_mq_getsetattr	276
-/* Number 277 is reserved for new sys_kexec_load */
+#define __NR_kexec_load		277
 #define __NR_add_key		278
 #define __NR_request_key	279
 #define __NR_keyctl		280
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index e3fc35f4e35f..0653a27c3d72 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -114,6 +114,7 @@ extern struct kimage *kexec_image;
 #define KEXEC_ARCH_PPC     (20 << 16)
 #define KEXEC_ARCH_PPC64   (21 << 16)
 #define KEXEC_ARCH_IA_64   (50 << 16)
+#define KEXEC_ARCH_S390    (22 << 16)
 
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH)  /* List of defined/legal kexec flags */
 
-- 
cgit v1.2.3


From 625f1c8219d95300ed32e4c67eb62a50ded095ba Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:12 -0700
Subject: [PATCH] Kdump: Export crash notes section address through sysfs

o Following patch exports kexec global variable "crash_notes" to user space
  through sysfs as kernel attribute in /sys/kernel.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/kexec.h   | 5 +++++
 include/asm-x86_64/kexec.h | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h
index a1599b55d62d..6ed2a03e37b3 100644
--- a/include/asm-i386/kexec.h
+++ b/include/asm-i386/kexec.h
@@ -25,4 +25,9 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_386
 
+#define MAX_NOTE_BYTES 1024
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
 #endif /* _I386_KEXEC_H */
diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
index dc33646dc7dd..42d2ff15c592 100644
--- a/include/asm-x86_64/kexec.h
+++ b/include/asm-x86_64/kexec.h
@@ -25,4 +25,9 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_X86_64
 
+#define MAX_NOTE_BYTES 1024
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
 #endif /* _X86_64_KEXEC_H */
-- 
cgit v1.2.3


From 92aa63a5a1bf2e7b0c79e6716d24b76dbbdcf951 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:18 -0700
Subject: [PATCH] kdump: Retrieve saved max pfn

This patch retrieves the max_pfn being used by previous kernel and stores it
in a safe location (saved_max_pfn) before it is overwritten due to user
defined memory map.  This pfn is used to make sure that user does not try to
read the physical memory beyond saved_max_pfn.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 500f451ce0c0..82bd8842d11c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -22,6 +22,10 @@ extern unsigned long min_low_pfn;
  */
 extern unsigned long max_pfn;
 
+#ifdef CONFIG_CRASH_DUMP
+extern unsigned long saved_max_pfn;
+#endif
+
 /*
  * node_bootmem_map is a map pointer - the bits represent all physical 
  * memory pages (including holes) on the node.
-- 
cgit v1.2.3


From 60e64d46a58236e3c718074372cab6a5b56a3b15 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:19 -0700
Subject: [PATCH] kdump: Routines for copying dump pages

This patch provides the interfaces necessary to read the dump contents,
treating it as a high memory device.

Signed off by Hariprasad Nellitheertha <hari@in.ibm.com>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/highmem.h |  1 +
 include/linux/crash_dump.h | 13 +++++++++++++
 include/linux/highmem.h    |  1 +
 3 files changed, 15 insertions(+)
 create mode 100644 include/linux/crash_dump.h

(limited to 'include')

diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h
index 1df42bf347df..0fd331306b60 100644
--- a/include/asm-i386/highmem.h
+++ b/include/asm-i386/highmem.h
@@ -70,6 +70,7 @@ void *kmap(struct page *page);
 void kunmap(struct page *page);
 void *kmap_atomic(struct page *page, enum km_type type);
 void kunmap_atomic(void *kvaddr, enum km_type type);
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	do { } while (0)
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
new file mode 100644
index 000000000000..7d983b817429
--- /dev/null
+++ b/include/linux/crash_dump.h
@@ -0,0 +1,13 @@
+#ifndef LINUX_CRASH_DUMP_H
+#define LINUX_CRASH_DUMP_H
+
+#ifdef CONFIG_CRASH_DUMP
+#include <linux/kexec.h>
+#include <linux/smp_lock.h>
+#include <linux/device.h>
+#include <linux/proc_fs.h>
+
+extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
+						unsigned long, int);
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 2a7e6c65c882..6bece9280eb7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -28,6 +28,7 @@ static inline void *kmap(struct page *page)
 
 #define kmap_atomic(page, idx)		page_address(page)
 #define kunmap_atomic(addr, idx)	do { } while (0)
+#define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #endif /* CONFIG_HIGHMEM */
-- 
cgit v1.2.3


From 2030eae52b416a9a9f0ffda74c982b7f1e19496d Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:20 -0700
Subject: [PATCH] Retrieve elfcorehdr address from command line

This patch adds support for retrieving the address of elf core header if one
is passed in command line.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/crash_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 7d983b817429..3f25fd1eaa4b 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 
+extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
 #endif /* CONFIG_CRASH_DUMP */
-- 
cgit v1.2.3


From 666bfddbe8b8fd4fd44617d6c55193d5ac7edb29 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:21 -0700
Subject: [PATCH] kdump: Access dump file in elf format (/proc/vmcore)

From: "Vivek Goyal" <vgoyal@in.ibm.com>

o Support for /proc/vmcore interface. This interface exports elf core image
  either in ELF32 or ELF64 format, depending on the format in which elf headers
  have been stored by crashed kernel.
o Added support for CONFIG_VMCORE config option.
o Removed the dependency on /proc/kcore.

From: "Eric W. Biederman" <ebiederm@xmission.com>

This patch has been refactored to more closely match the prevailing style in
the affected files.  And to clearly indicate the dependency between
/proc/kcore and proc/vmcore.c

From: Hariprasad Nellitheertha <hari@in.ibm.com>

This patch contains the code that provides an ELF format interface to the
previous kernel's memory post kexec reboot.

Signed off by Hariprasad Nellitheertha <hari@in.ibm.com>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/crash_dump.h | 4 ++++
 include/linux/proc_fs.h    | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 3f25fd1eaa4b..534d750d922d 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -7,8 +7,12 @@
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 
+#define ELFCORE_ADDR_MAX	(-1ULL)
 extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
+extern struct file_operations proc_vmcore_operations;
+extern struct proc_dir_entry *proc_vmcore;
+
 #endif /* CONFIG_CRASH_DUMP */
 #endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 59e505261fd6..0563581e3a02 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -74,6 +74,13 @@ struct kcore_list {
 	size_t size;
 };
 
+struct vmcore {
+	struct list_head list;
+	unsigned long long paddr;
+	unsigned long size;
+	loff_t offset;
+};
+
 #ifdef CONFIG_PROC_FS
 
 extern struct proc_dir_entry proc_root;
-- 
cgit v1.2.3


From 6e274d144302068a00794ec22e73520c0615cb6f Mon Sep 17 00:00:00 2001
From: Alexander Nyberg <alexn@telia.com>
Date: Sat, 25 Jun 2005 14:58:26 -0700
Subject: [PATCH] kdump: Use real pt_regs from exception

Makes kexec_crashdump() take a pt_regs * as an argument.  This allows to
get exact register state at the point of the crash.  If we come from direct
panic assertion NULL will be passed and the current registers saved before
crashdump.

This hooks into two places:
die(): check the conditions under which we will panic when calling
do_exit and go there directly with the pt_regs that caused the fatal
fault.

die_nmi(): If we receive an NMI lockup while in the kernel use the
pt_regs and go directly to crash_kexec(). We're probably nested up badly
at this point so this might be the only chance to escape with proper
information.

Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h  | 8 ++++++--
 include/linux/reboot.h | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0653a27c3d72..7383173a3a9c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -99,7 +99,8 @@ extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
 	unsigned long flags);
 #endif
 extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
-extern void crash_kexec(void);
+extern void crash_kexec(struct pt_regs *);
+int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
 
 #define KEXEC_ON_CRASH  0x00000001
@@ -123,6 +124,9 @@ extern struct kimage *kexec_image;
 extern struct resource crashk_res;
 
 #else /* !CONFIG_KEXEC */
-static inline void crash_kexec(void) { }
+struct pt_regs;
+struct task_struct;
+static inline void crash_kexec(struct pt_regs *regs) { }
+static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 #endif /* CONFIG_KEXEC */
 #endif /* LINUX_KEXEC_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index c5a05e16edb2..2d4dd23168dd 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -52,7 +52,8 @@ extern void machine_halt(void);
 extern void machine_power_off(void);
 
 extern void machine_shutdown(void);
-extern void machine_crash_shutdown(void);
+struct pt_regs;
+extern void machine_crash_shutdown(struct pt_regs *);
 
 #endif
 
-- 
cgit v1.2.3


From 72414d3f1d22fc3e311b162fca95c430048d38ce Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:28 -0700
Subject: [PATCH] kexec code cleanup

o Following patch provides purely cosmetic changes and corrects CodingStyle
  guide lines related certain issues like below in kexec related files

  o braces for one line "if" statements, "for" loops,
  o more than 80 column wide lines,
  o No space after "while", "for" and "switch" key words

o Changes:
  o take-2: Removed the extra tab before "case" key words.
  o take-3: Put operator at the end of line and space before "*/"

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h    | 13 ++++++++-----
 include/linux/syscalls.h |  6 +++---
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 7383173a3a9c..c8468472aec0 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -91,14 +91,17 @@ extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
 extern asmlinkage long sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments,
-	unsigned long flags);
+					unsigned long nr_segments,
+					struct kexec_segment __user *segments,
+					unsigned long flags);
 #ifdef CONFIG_COMPAT
 extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct compat_kexec_segment __user *segments,
-	unsigned long flags);
+				unsigned long nr_segments,
+				struct compat_kexec_segment __user *segments,
+				unsigned long flags);
 #endif
-extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
+extern struct page *kimage_alloc_control_pages(struct kimage *image,
+						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7ba8f8f747aa..52830b6d94e5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -159,9 +159,9 @@ asmlinkage long sys_shutdown(int, int);
 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
 				void __user *arg);
 asmlinkage long sys_restart_syscall(void);
-asmlinkage long sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments,
-	unsigned long flags);
+asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
+				struct kexec_segment __user *segments,
+				unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage void sys_exit_group(int error_code);
-- 
cgit v1.2.3


From 8c0e33c133021ee241e9d51255b9fb18eb34ef0e Mon Sep 17 00:00:00 2001
From: Nick Wilson <njw@osdl.org>
Date: Sat, 25 Jun 2005 14:59:00 -0700
Subject: [PATCH] Use ALIGN to remove duplicate code

This patch makes use of ALIGN() to remove duplicate round-up code.

Signed-off-by: Nick Wilson <njw@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/a.out.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index af8a1dfa5c32..f913cc3e1b0d 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -138,7 +138,7 @@ enum machine_type {
 #endif
 #endif
 
-#define _N_SEGMENT_ROUND(x) (((x) + SEGMENT_SIZE - 1) & ~(SEGMENT_SIZE - 1))
+#define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
 
 #define _N_TXTENDADDR(x) (N_TXTADDR(x)+(x).a_text)
 
-- 
cgit v1.2.3


From 681ea4b930768444e9d88651c1362b0bf6d2a42b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:04 -0700
Subject: [PATCH] drivers/char/nvram.c: possible cleanups

This patch contains the following possible cleanups:
- make the needlessly global function __nvram_set_checksum static
- #if 0 the unused global function nvram_set_checksum
- remove the EXPORT_SYMBOL's for both functions

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nvram.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index b031e41b5e0d..9189829c131c 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -20,8 +20,6 @@ extern void __nvram_write_byte(unsigned char c, int i);
 extern void nvram_write_byte(unsigned char c, int i);
 extern int __nvram_check_checksum(void);
 extern int nvram_check_checksum(void);
-extern void __nvram_set_checksum(void);
-extern void nvram_set_checksum(void);
 #endif
 
 #endif  /* _LINUX_NVRAM_H */
-- 
cgit v1.2.3


From 93d17d3d84b7147e8f07aeeb15ec01aa92c6b564 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:10 -0700
Subject: [PATCH] drivers/block/ll_rw_blk.c: cleanups

This patch contains the following cleanups:
- make needlessly global code static
- remove the following unused global functions:
  - blkdev_scsi_issue_flush_fn
  - __blk_attempt_remerge
- remove the following unused EXPORT_SYMBOL's:
  - blk_phys_contig_segment
  - blk_hw_contig_segment
  - blkdev_scsi_issue_flush_fn
  - __blk_attempt_remerge

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60272141ff19..b54a0348a890 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -539,15 +539,12 @@ extern void generic_make_request(struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void blk_end_sync_rq(struct request *rq);
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
-extern void __blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, int);
 extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
 extern void blk_requeue_request(request_queue_t *, struct request *);
 extern void blk_plug_device(request_queue_t *);
 extern int blk_remove_plug(request_queue_t *);
 extern void blk_recount_segments(request_queue_t *, struct bio *);
-extern int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
-extern int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
@@ -631,7 +628,6 @@ extern void blk_queue_dma_alignment(request_queue_t *, int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern void blk_queue_ordered(request_queue_t *, int);
 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
-extern int blkdev_scsi_issue_flush_fn(request_queue_t *, struct gendisk *, sector_t *);
 extern struct request *blk_start_pre_flush(request_queue_t *,struct request *);
 extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int);
 extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int);
@@ -675,8 +671,6 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-extern void drive_stat_acct(struct request *, int, int);
-
 static inline int queue_hardsect_size(request_queue_t *q)
 {
 	int retval = 512;
-- 
cgit v1.2.3


From 3e1d1d28d99dabe63c64f7f40f1ca1d646de1f73 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Fri, 24 Jun 2005 23:13:50 -0700
Subject: [PATCH] Cleanup patch for process freezing

1. Establish a simple API for process freezing defined in linux/include/sched.h:

   frozen(process)		Check for frozen process
   freezing(process)		Check if a process is being frozen
   freeze(process)		Tell a process to freeze (go to refrigerator)
   thaw_process(process)	Restart process
   frozen_process(process)	Process is frozen now

2. Remove all references to PF_FREEZE and PF_FROZEN from all
   kernel sources except sched.h

3. Fix numerous locations where try_to_freeze is manually done by a driver

4. Remove the argument that is no longer necessary from two function calls.

5. Some whitespace cleanup

6. Clear potential race in refrigerator (provides an open window of PF_FREEZE
   cleared before setting PF_FROZEN, recalc_sigpending does not check
   PF_FROZEN).

This patch does not address the problem of freeze_processes() violating the rule
that a task may only modify its own flags by setting PF_FREEZE. This is not clean
in an SMP environment. freeze(process) is therefore not SMP safe!

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 73 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c69682b0444..e7fd09b0557f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1245,33 +1245,78 @@ extern void normalize_rt_tasks(void);
 
 #endif
 
-/* try_to_freeze
- *
- * Checks whether we need to enter the refrigerator
- * and returns 1 if we did so.
- */
 #ifdef CONFIG_PM
-extern void refrigerator(unsigned long);
+/*
+ * Check if a process has been frozen
+ */
+static inline int frozen(struct task_struct *p)
+{
+	return p->flags & PF_FROZEN;
+}
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return p->flags & PF_FREEZE;
+}
+
+/*
+ * Request that a process be frozen
+ * FIXME: SMP problem. We may not modify other process' flags!
+ */
+static inline void freeze(struct task_struct *p)
+{
+	p->flags |= PF_FREEZE;
+}
+
+/*
+ * Wake up a frozen process
+ */
+static inline int thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		wake_up_process(p);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * freezing is complete, mark process as frozen
+ */
+static inline void frozen_process(struct task_struct *p)
+{
+	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+}
+
+extern void refrigerator(void);
 extern int freeze_processes(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(unsigned long refrigerator_flags)
+static inline int try_to_freeze(void)
 {
-	if (unlikely(current->flags & PF_FREEZE)) {
-		refrigerator(refrigerator_flags);
+	if (freezing(current)) {
+		refrigerator();
 		return 1;
 	} else
 		return 0;
 }
 #else
-static inline void refrigerator(unsigned long flag) {}
+static inline int frozen(struct task_struct *p) { return 0; }
+static inline int freezing(struct task_struct *p) { return 0; }
+static inline void freeze(struct task_struct *p) { BUG(); }
+static inline int thaw_process(struct task_struct *p) { return 1; }
+static inline void frozen_process(struct task_struct *p) { BUG(); }
+
+static inline void refrigerator(void) {}
 static inline int freeze_processes(void) { BUG(); return 0; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(unsigned long refrigerator_flags)
-{
-	return 0;
-}
+static inline int try_to_freeze(void) { return 0; }
+
 #endif /* CONFIG_PM */
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3