From c19c03fc749147f565e807fa65f1729066800571 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 4 Jun 2007 15:15:35 +1000
Subject: [POWERPC] unmap_vm_area becomes unmap_kernel_range for the public

This makes unmap_vm_area static and a wrapper around a new
exported unmap_kernel_range that takes an explicit range instead
of a vm_area struct.

This makes it more versatile for code that wants to play with kernel
page tables outside of the standard vmalloc area.

(One example is some rework of the PowerPC PCI IO space mapping
code that depends on that patch and removes some code duplication
and horrible abuse of forged struct vm_struct).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/vmalloc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b7ee83787c1..132b260aef1e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -65,9 +65,10 @@ extern struct vm_struct *get_vm_area_node(unsigned long size,
 					  unsigned long flags, int node,
 					  gfp_t gfp_mask);
 extern struct vm_struct *remove_vm_area(void *addr);
+
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
-extern void unmap_vm_area(struct vm_struct *area);
+extern void unmap_kernel_range(unsigned long addr, unsigned long size);
 
 /*
  *	Internals.  Dont't use..
-- 
cgit v1.2.3


From d7ad2254fa7cc11aec3faeba076c1243f6adeb47 Mon Sep 17 00:00:00 2001
From: John Keller <jpk@sgi.com>
Date: Mon, 9 Jul 2007 11:42:24 -0700
Subject: [IA64] SN: Correct ROM resource length for BIOS copy

On SN systems, when setting the IORESOURCE_ROM_BIOS_COPY resource flag,
the resource length should be set to the actual size of the ROM image
so that a call to pci_map_rom() returns the correct size.

Signed-off-by: John Keller <jpk@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 086a0e5a6318..acb9387c0364 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -560,6 +560,7 @@ void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void __iomem __must_check *pci_map_rom_copy(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
 void pci_remove_rom(struct pci_dev *pdev);
+size_t pci_get_rom_size(void __iomem *rom, size_t size);
 
 /* Power management related routines */
 int pci_save_state(struct pci_dev *dev);
-- 
cgit v1.2.3


From 149983af609e8f5c57157467baf8545d17b8a6a1 Mon Sep 17 00:00:00 2001
From: Dotan Barak <dotanb@dev.mellanox.co.il>
Date: Tue, 26 Jun 2007 15:55:28 +0300
Subject: mlx4_core: Get the maximum message size from reported device
 capabilities

Get the maximum message size from the device capabilities returned
from the QUERY_DEV_CAP firmware command, rather than hard-coding 2 GB.

Signed-off-by: Dotan Barak <dotanb@dev.mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b372f5910fc1..8209387ee854 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -172,6 +172,7 @@ struct mlx4_caps {
 	int			num_pds;
 	int			reserved_pds;
 	int			mtt_entry_sz;
+	u32			max_msg_sz;
 	u32			page_size_cap;
 	u32			flags;
 	u16			stat_rate_support;
-- 
cgit v1.2.3


From 80128ff79d282cf71b1819dbca9b8dd47d8ed3e8 Mon Sep 17 00:00:00 2001
From: Vitaly Bordug <vitb@kernel.crashing.org>
Date: Mon, 9 Jul 2007 11:37:35 -0700
Subject: [POWERPC] 8xx: mpc885ads pcmcia support

Adds support for PowerQuicc on-chip PCMCIA.  The driver is implemented as
of_device, so only arch/powerpc stuff is capable to use it, which now implies
only mpc885ads reference board.

To cope with the code that should be hooked inside driver, but is really board
specific (like set_voltage), global structure mpc8xx_pcmcia_ops holds
necessary function pointers that are filled in the BSP code.

[akpm@linux-foundation.org: whitespace diddles]
Signed-off-by: Vitaly Bordug <vitb@kernel.crashing.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Olof Johansson <olof@lixom.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 include/linux/fsl_devices.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 73710d617775..12e631f0fb77 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -120,5 +120,10 @@ struct fsl_spi_platform_data {
 	u32	sysclk;
 };
 
+struct mpc8xx_pcmcia_ops {
+	void(*hw_ctrl)(int slot, int enable);
+	int(*voltage_set)(int slot, int vcc, int vpp);
+};
+
 #endif /* _FSL_DEVICE_H_ */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 4c62b53454a83178676e5ecae6665447d363c7b4 Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 27 Jun 2007 16:02:14 +0530
Subject: configfs: misc cleanups

1. item.c:config_item_cleanup() is a private function (only called by
config_item_release() in same file). However, it is spuriously
exported in include/linux/configfs.h, so remove that export and make
it static in item.c. Also, it is no longer exported / interface
function, so no need to give comment for this function (the comment
was stating obvious thing, anyway).

2. Kernel-doc comment format does not allow empty line between end of
comment and start of function (declaration line). There were several
such spurious empty lines in item.c, so fix them.

  fs/configfs/item.c       |   15 +++------------
  include/linux/configfs.h |    1 -
  2 files changed, 3 insertions(+), 13 deletions(-)

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index fef6f3d0a4a7..3d4a96eb0e9b 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -75,7 +75,6 @@ extern void config_item_init(struct config_item *);
 extern void config_item_init_type_name(struct config_item *item,
 				       const char *name,
 				       struct config_item_type *type);
-extern void config_item_cleanup(struct config_item *);
 
 extern struct config_item * config_item_get(struct config_item *);
 extern void config_item_put(struct config_item *);
-- 
cgit v1.2.3


From 9b1d9aa4e9c5cafe73b9df21d758b50b5d75264d Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 4 Jul 2007 16:37:06 +0530
Subject: [PATCH] configfs+dlm: Separate out __CONFIGFS_ATTR into configfs.h

fs/dlm/config.c contains a useful generic macro called __CONFIGFS_ATTR
that is similar to sysfs' __ATTR macro that makes defining attributes
easy for any user of configfs. Separate it out into configfs.h so that
other users (forthcoming in dynamic netconsole patchset) can use it too.

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: David Teigland <teigland@redhat.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 3d4a96eb0e9b..def7c83d43a2 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -130,6 +130,22 @@ struct configfs_attribute {
 	mode_t			ca_mode;
 };
 
+/*
+ * Users often need to create attribute structures for their configurable
+ * attributes, containing a configfs_attribute member and function pointers
+ * for the show() and store() operations on that attribute. They can use
+ * this macro (similar to sysfs' __ATTR) to make defining attributes easier.
+ */
+#define __CONFIGFS_ATTR(_name, _mode, _show, _store)			\
+{									\
+	.attr	= {							\
+			.ca_name = __stringify(_name),			\
+			.ca_mode = _mode,				\
+			.ca_owner = THIS_MODULE,			\
+	},								\
+	.show	= _show,						\
+	.store	= _store,						\
+}
 
 /*
  * If allow_link() exists, the item can symlink(2) out to other
-- 
cgit v1.2.3


From 3fe6c5ce1176cf661dbe71fc43b627c1a742a89a Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 4 Jul 2007 16:37:16 +0530
Subject: [PATCH] configfs+dlm: Rename config_group_find_obj and state
 semantics clearly

Configfs being based upon sysfs code, config_group_find_obj() is probably
so named because of the similar kset_find_obj() in sysfs. However,
"kobject"s in sysfs become "config_item"s in configfs, so let's call it
config_group_find_item() instead, for sake of uniformity, and make
corresponding change in the users of this function.

BTW a crucial difference between kset_find_obj and config_group_find_item
is in locking expectations. kset_find_obj does its locking by itself, but
config_group_find_item expects the *caller* to do the locking. The reason
for this: kset's have their own locks, config_group's don't but instead
rely on the subsystem mutex. And, subsystem needn't necessarily be around
when config_group_find_item() is called.

So let's state these locking semantics explicitly, and rectify the comment,
otherwise bugs could continue to occur in future, as they did in the past
(refer commit d82b8191e238 in gfs2-2.6-fixes.git).

[ I also took the opportunity to fix some bad whitespace and
double-empty lines. --Joel ]

[ Conflict in fs/dlm/config.c with commit
  3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ]

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: David Teigland <teigland@redhat.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index def7c83d43a2..bbb1b6cafa8b 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -86,12 +86,10 @@ struct config_item_type {
 	struct configfs_attribute		**ct_attrs;
 };
 
-
 /**
  *	group - a group of config_items of a specific type, belonging
  *	to a specific subsystem.
  */
-
 struct config_group {
 	struct config_item		cg_item;
 	struct list_head		cg_children;
@@ -99,13 +97,11 @@ struct config_group {
 	struct config_group		**default_groups;
 };
 
-
 extern void config_group_init(struct config_group *group);
 extern void config_group_init_type_name(struct config_group *group,
 					const char *name,
 					struct config_item_type *type);
 
-
 static inline struct config_group *to_config_group(struct config_item *item)
 {
 	return item ? container_of(item,struct config_group,cg_item) : NULL;
@@ -121,7 +117,8 @@ static inline void config_group_put(struct config_group *group)
 	config_item_put(&group->cg_item);
 }
 
-extern struct config_item *config_group_find_obj(struct config_group *, const char *);
+extern struct config_item *config_group_find_item(struct config_group *,
+						  const char *);
 
 
 struct configfs_attribute {
-- 
cgit v1.2.3


From e6bd07aee739566803425acdbf5cdb29919164e1 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 6 Jul 2007 23:33:17 -0700
Subject: configfs: Convert subsystem semaphore to mutex

Convert the su_sem member of struct configfs_subsystem to a struct
mutex, as that's what it is. Also convert all the users and update
Documentation/configfs.txt and Documentation/configfs_example.c
accordingly.

[ Conflict in fs/dlm/config.c with commit
  3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ]

Inspired-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index bbb1b6cafa8b..5ce0fc4e3b5b 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -40,9 +40,9 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/mutex.h>
 
 #include <asm/atomic.h>
-#include <asm/semaphore.h>
 
 #define CONFIGFS_ITEM_NAME_LEN	20
 
@@ -174,7 +174,7 @@ struct configfs_group_operations {
 
 struct configfs_subsystem {
 	struct config_group	su_group;
-	struct semaphore	su_sem;
+	struct mutex		su_mutex;
 };
 
 static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group)
-- 
cgit v1.2.3


From 299894cc9001b09e3e9685f2709b49e7e1092ccc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 6 Oct 2006 17:33:23 -0700
Subject: configfs: accessing item hierarchy during rmdir(2)

Add a notification callback, ops->disconnect_notify(). It has the same
prototype as ->drop_item(), but it will be called just before the item
linkage is broken. This way, configfs users who want to do work while
the object is still in the heirarchy have a chance.

Client drivers will still need to config_item_put() in their
->drop_item(), if they implement it.  They need do nothing in
->disconnect_notify().  They don't have to provide it if they don't
care.  But someone who wants to be notified before ci_parent is set to
NULL can now be notified.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 5ce0fc4e3b5b..8227e730dac7 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -169,6 +169,7 @@ struct configfs_group_operations {
 	struct config_item *(*make_item)(struct config_group *group, const char *name);
 	struct config_group *(*make_group)(struct config_group *group, const char *name);
 	int (*commit_item)(struct config_item *item);
+	void (*disconnect_notify)(struct config_group *group, struct config_item *item);
 	void (*drop_item)(struct config_group *group, struct config_item *item);
 };
 
-- 
cgit v1.2.3


From 631d1febab8e546e3bb800bdfe2c212b8adf87de Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 18 Jun 2007 18:06:09 -0700
Subject: configfs: config item dependancies.

Sometimes other drivers depend on particular configfs items.  For
example, ocfs2 mounts depend on a heartbeat region item.  If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly.  Not happy.

This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item().  A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on.  configfs will then return -EBUSY from rmdir(2) for that
item.  When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.

These API cannot be called underneath any configfs callbacks, as
they will conflict.  They can block and allocate.  A client driver
probably shouldn't calling them of its own gumption.  Rather it should
be providing an API that external subsystems call.

How does this work?  Imagine the ocfs2 mount process.  When it mounts,
it asks for a heart region item.  This is done via a call into the
heartbeat code.  Inside the heartbeat code, the region item is looked
up.  Here, the heartbeat code calls configfs_depend_item().  If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.

[ Fixed some bad whitespace in configfs.txt. --Mark ]

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 include/linux/configfs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 8227e730dac7..8c6967f3fb11 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -188,6 +188,11 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro
 int configfs_register_subsystem(struct configfs_subsystem *subsys);
 void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
 
+/* These functions can sleep and can alloc with GFP_KERNEL */
+/* WARNING: These cannot be called underneath configfs callbacks!! */
+int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
+void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target);
+
 #endif  /* __KERNEL__ */
 
 #endif /* _CONFIGFS_H_ */
-- 
cgit v1.2.3


From 45a66c1c3ff88e8050dd25e81bafdf79a12a8042 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 9 Jul 2007 11:46:13 -0700
Subject: libata-core: convert to use cancel_rearming_delayed_work()

We should not use cancel_work_sync(delayed_work->work). This works, but not
good. We can use cancel_rearming_delayed_work(), this also simplifies the
code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index a3df64677ac3..bf98d44c8109 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -196,7 +196,6 @@ enum {
 	ATA_PFLAG_SCSI_HOTPLUG	= (1 << 6), /* SCSI hotplug scheduled */
 	ATA_PFLAG_INITIALIZING	= (1 << 7), /* being initialized, don't touch */
 
-	ATA_PFLAG_FLUSH_PORT_TASK = (1 << 16), /* flush port task */
 	ATA_PFLAG_SUSPENDED	= (1 << 17), /* port is suspended (power) */
 	ATA_PFLAG_PM_PENDING	= (1 << 18), /* PM operation pending */
 	ATA_PFLAG_GTM_VALID	= (1 << 19), /* acpi_gtm data valid */
-- 
cgit v1.2.3


From d583bc18812f8da52bf25eef9cd111e5fd46a6ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 4 Jul 2007 18:02:07 +0900
Subject: libata: simplify PCI legacy SFF host handling

With PCI resource fix up for legacy hosts.  We can use the same code
path to allocate IO resources and initialize host for both legacy and
native SFF hosts.  Only IRQ requesting needs to be different.

Rename ata_pci_*_native_host() to ata_pci_*_sff_host(), kill all
legacy specific functions and use the renamed functions instead.  This
simplifies code a lot.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index bf98d44c8109..0c8b6578bd59 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -870,11 +870,11 @@ struct pci_bits {
 	unsigned long		val;
 };
 
-extern int ata_pci_init_native_host(struct ata_host *host);
+extern int ata_pci_init_sff_host(struct ata_host *host);
 extern int ata_pci_init_bmdma(struct ata_host *host);
-extern int ata_pci_prepare_native_host(struct pci_dev *pdev,
-				const struct ata_port_info * const * ppi,
-				struct ata_host **r_host);
+extern int ata_pci_prepare_sff_host(struct pci_dev *pdev,
+				    const struct ata_port_info * const * ppi,
+				    struct ata_host **r_host);
 extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits);
 extern unsigned long ata_pci_default_filter(struct ata_device *, unsigned long);
 #endif /* CONFIG_PCI */
-- 
cgit v1.2.3


From 75683fe7153c3817bb4fd4491e2a5913af6c463e Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 5 Jul 2007 13:31:27 +0900
Subject: libata: clean up horkage handling

Horkage handling had the following problems.

* dev->horkage was positioned after ATA_DEVICE_CLEAR_OFFSET, so it was
  cleared before the device is configured.  This broke
  HORKAGE_DIAGNOSTIC.

* Some used dev->horkage while others called ata_device_blacklisted()
  directly.  This was at best confusing.

This patch moves dev->horkage right after dev->flags and set the field
according to the blacklist during device configuration.  All users
test against dev->horkage.  ata_device_blacklisted() now has only one
user, make it static.  While at it, rename it to ata_dev_blacklisted()
for consistency.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0c8b6578bd59..47cd2a1c5544 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -434,6 +434,7 @@ struct ata_device {
 	struct ata_port		*ap;
 	unsigned int		devno;		/* 0 or 1 */
 	unsigned long		flags;		/* ATA_DFLAG_xxx */
+	unsigned int		horkage;	/* List of broken features */
 	struct scsi_device	*sdev;		/* attached SCSI device */
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
@@ -465,7 +466,6 @@ struct ata_device {
 	/* error history */
 	struct ata_ering	ering;
 	int			spdn_cnt;
-	unsigned int		horkage;	/* List of broken features */
 };
 
 /* Offset into struct ata_device.  Fields above it are maintained
@@ -793,7 +793,6 @@ extern void ata_id_string(const u16 *id, unsigned char *s,
 extern void ata_id_c_string(const u16 *id, unsigned char *s,
 			    unsigned int ofs, unsigned int len);
 extern void ata_id_to_dma_mode(struct ata_device *dev, u8 unknown);
-extern unsigned long ata_device_blacklisted(const struct ata_device *dev);
 extern void ata_bmdma_setup (struct ata_queued_cmd *qc);
 extern void ata_bmdma_start (struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
-- 
cgit v1.2.3


From 814600ee10d3c056ada315cdbdc2ebe48f54c75a Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 1 Jul 2007 19:05:58 +0900
Subject: libata-link: add PMP related ATA constants

Add Port Multiplier related ATA constants and macros.  Some of these
will be used by ata_link implementation.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/ata.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 407dc7e098bc..b5a20162af32 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -164,6 +164,8 @@ enum {
 	ATA_CMD_SET_MAX		= 0xF9,
 	ATA_CMD_SET_MAX_EXT	= 0x37,
 	ATA_CMD_READ_LOG_EXT	= 0x2f,
+	ATA_CMD_PMP_READ	= 0xE4,
+	ATA_CMD_PMP_WRITE	= 0xE8,
 
 	/* READ_LOG_EXT pages */
 	ATA_LOG_SATA_NCQ	= 0x10,
@@ -212,6 +214,28 @@ enum {
 						   0=to device, 1=to host */
 	ATAPI_CDB_LEN		= 16,
 
+	/* PMP stuff */
+	SATA_PMP_MAX_PORTS	= 15,
+	SATA_PMP_CTRL_PORT	= 15,
+
+	SATA_PMP_GSCR_DWORDS	= 128,
+	SATA_PMP_GSCR_PROD_ID	= 0,
+	SATA_PMP_GSCR_REV	= 1,
+	SATA_PMP_GSCR_PORT_INFO	= 2,
+	SATA_PMP_GSCR_ERROR	= 32,
+	SATA_PMP_GSCR_ERROR_EN	= 33,
+	SATA_PMP_GSCR_FEAT	= 64,
+	SATA_PMP_GSCR_FEAT_EN	= 96,
+
+	SATA_PMP_PSCR_STATUS	= 0,
+	SATA_PMP_PSCR_ERROR	= 1,
+	SATA_PMP_PSCR_CONTROL	= 2,
+
+	SATA_PMP_FEAT_BIST	= (1 << 0),
+	SATA_PMP_FEAT_PMREQ	= (1 << 1),
+	SATA_PMP_FEAT_DYNSSC	= (1 << 2),
+	SATA_PMP_FEAT_NOTIFY	= (1 << 3),
+
 	/* cable types */
 	ATA_CBL_NONE		= 0,
 	ATA_CBL_PATA40		= 1,
@@ -418,4 +442,9 @@ static inline int lba_48_ok(u64 block, u32 n_block)
 	return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536);
 }
 
+#define sata_pmp_gscr_vendor(gscr)	((gscr)[SATA_PMP_GSCR_PROD_ID] & 0xffff)
+#define sata_pmp_gscr_devid(gscr)	((gscr)[SATA_PMP_GSCR_PROD_ID] >> 16)
+#define sata_pmp_gscr_rev(gscr)		(((gscr)[SATA_PMP_GSCR_REV] >> 8) & 0xff)
+#define sata_pmp_gscr_ports(gscr)	((gscr)[SATA_PMP_GSCR_PORT_INFO] & 0xf)
+
 #endif /* __LINUX_ATA_H__ */
-- 
cgit v1.2.3


From 88be9f990fe70f0f177ef44a16a477599e91f825 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2007 10:42:27 -0400
Subject: NFS: Replace vfsmount and dentry in nfs_open_context with struct path

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0543439a97af..07eea8f64ecf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -31,6 +31,7 @@
 
 #include <linux/in.h>
 #include <linux/mm.h>
+#include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
@@ -70,8 +71,7 @@ struct nfs_access_entry {
 struct nfs4_state;
 struct nfs_open_context {
 	atomic_t count;
-	struct vfsmount *vfsmnt;
-	struct dentry *dentry;
+	struct path path;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	fl_owner_t lockowner;
-- 
cgit v1.2.3


From aa53ed541a1fec78a78d02afc8b042d040cc080d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 5 Jun 2007 14:49:03 -0400
Subject: NFS4: on a O_EXCL OPEN make sure SETATTR sets the fields holding the
 verifier

The Linux NFS4 client simply skips over the bitmask in an O_EXCL open
call and so it doesn't bother to reset any fields that may be holding
the verifier. This patch has us save the first two words of the bitmask
(which is all the current client has #defines for). The client then
later checks this bitmask and turns on the appropriate flags in the
sattr->ia_verify field for the following SETATTR call.

This patch only currently checks to see if the server used the atime
and mtime slots for the verifier (which is what the Linux server uses
for this). I'm not sure of what other fields the server could
reasonably use, but adding checks for others should be trivial.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h    | 1 +
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7e7f33a38fc0..8726491de154 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 
+#define NFS4_BITMAP_SIZE	2
 #define NFS4_VERIFIER_SIZE	8
 #define NFS4_STATEID_SIZE	16
 #define NFS4_FHSIZE		128
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 10c26ed0db71..f7100df3a690 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -144,6 +144,7 @@ struct nfs_openres {
 	nfs4_stateid		delegation;
 	__u32			do_recall;
 	__u64			maxsize;
+	__u32			attrset[NFS4_BITMAP_SIZE];
 };
 
 /*
-- 
cgit v1.2.3


From c03b40246123b2ced79e2620d1d2c089bb12369a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 13:26:38 -0400
Subject: NFS: Convert struct nfs_page to use krefs

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index bd193af80162..c780e7e39f99 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -16,7 +16,7 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/nfs_xdr.h>
 
-#include <asm/atomic.h>
+#include <linux/kref.h>
 
 /*
  * Valid flags for the radix tree
@@ -42,7 +42,7 @@ struct nfs_page {
 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
 				wb_pgbase,	/* Start of page data */
 				wb_bytes;	/* Length of request */
-	atomic_t		wb_count;	/* reference count */
+	struct kref		wb_kref;	/* reference count */
 	unsigned long		wb_flags;
 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
 };
@@ -89,7 +89,7 @@ extern  void nfs_clear_page_writeback(struct nfs_page *req);
 
 
 /*
- * Lock the page of an asynchronous request without incrementing the wb_count
+ * Lock the page of an asynchronous request without getting a new reference
  */
 static inline int
 nfs_lock_request_dontget(struct nfs_page *req)
@@ -98,14 +98,14 @@ nfs_lock_request_dontget(struct nfs_page *req)
 }
 
 /*
- * Lock the page of an asynchronous request
+ * Lock the page of an asynchronous request and take a reference
  */
 static inline int
 nfs_lock_request(struct nfs_page *req)
 {
 	if (test_and_set_bit(PG_BUSY, &req->wb_flags))
 		return 0;
-	atomic_inc(&req->wb_count);
+	kref_get(&req->wb_kref);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 9fd367f0f376ccfb2592eed9be0eece70429894f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 15:10:24 -0400
Subject: NFS cleanup: Rename NFS_PAGE_TAG_WRITEBACK to NFS_PAGE_TAG_LOCKED

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index c780e7e39f99..042434c39b7e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -21,7 +21,7 @@
 /*
  * Valid flags for the radix tree
  */
-#define NFS_PAGE_TAG_WRITEBACK	0
+#define NFS_PAGE_TAG_LOCKED	0
 
 /*
  * Valid flags for a dirty buffer
@@ -84,8 +84,7 @@ extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
-extern  int nfs_set_page_writeback_locked(struct nfs_page *req);
-extern  void nfs_clear_page_writeback(struct nfs_page *req);
+extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
 
 /*
-- 
cgit v1.2.3


From 5c36968343fcd013a3f7ae93f246c2e75596780b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 15:27:42 -0400
Subject: NFS cleanup: speed up nfs_scan_commit using radix tree tags

Add a tag for requests that are waiting for a COMMIT

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 042434c39b7e..481a42105d69 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,6 +22,7 @@
  * Valid flags for the radix tree
  */
 #define NFS_PAGE_TAG_LOCKED	0
+#define NFS_PAGE_TAG_COMMIT	1
 
 /*
  * Valid flags for a dirty buffer
@@ -71,8 +72,8 @@ extern	void nfs_clear_request(struct nfs_page *req);
 extern	void nfs_release_request(struct nfs_page *req);
 
 
-extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
-			  pgoff_t idx_start, unsigned int npages);
+extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+			  pgoff_t idx_start, unsigned int npages, int tag);
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
-- 
cgit v1.2.3


From 2aefa104313996d1a9582476cee53d1296c834bf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 15:40:59 -0400
Subject: NFS: Remove the redundant 'dirty' and 'commit' lists from nfs_inode

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h   | 5 +----
 include/linux/nfs_page.h | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 07eea8f64ecf..a94205476736 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -156,12 +156,9 @@ struct nfs_inode {
 	 * This is the list of dirty unwritten pages.
 	 */
 	spinlock_t		req_lock;
-	struct list_head	dirty;
-	struct list_head	commit;
 	struct radix_tree_root	nfs_page_tree;
 
-	unsigned int		ndirty,
-				ncommit,
+	unsigned int		ncommit,
 				npages;
 
 	/* Open contexts for shared mmap writes */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 481a42105d69..78e60798d10e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -34,8 +34,7 @@
 
 struct nfs_inode;
 struct nfs_page {
-	struct list_head	wb_list,	/* Defines state of page: */
-				*wb_list_head;	/*      read/write/commit */
+	struct list_head	wb_list;	/* Defines state of page: */
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	atomic_t		wb_complete;	/* i/os we're waiting for */
@@ -118,7 +117,6 @@ static inline void
 nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 {
 	list_add_tail(&req->wb_list, head);
-	req->wb_list_head = head;
 }
 
 
@@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req)
 	if (list_empty(&req->wb_list))
 		return;
 	list_del_init(&req->wb_list);
-	req->wb_list_head = NULL;
 }
 
 static inline struct nfs_page *
-- 
cgit v1.2.3


From dce34ce298d85b81630401f4feb4bd7ac77fe9c7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 15:47:53 -0400
Subject: NFS: Prevent integer overflow in nfs_scan_list()

Also ensure that nfs_inode ncommit and npages are large enough to represent
all possible values for the number of pages.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a94205476736..750708ccd708 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -158,7 +158,7 @@ struct nfs_inode {
 	spinlock_t		req_lock;
 	struct radix_tree_root	nfs_page_tree;
 
-	unsigned int		ncommit,
+	unsigned long		ncommit,
 				npages;
 
 	/* Open contexts for shared mmap writes */
-- 
cgit v1.2.3


From 3bec63db55463365110d00721ed60a31e4614cb6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Jun 2007 16:02:44 -0400
Subject: NFS: Convert struct nfs_open_context to use a kref

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 750708ccd708..bf24151d63be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -30,6 +30,7 @@
 #ifdef __KERNEL__
 
 #include <linux/in.h>
+#include <linux/kref.h>
 #include <linux/mm.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
@@ -70,7 +71,7 @@ struct nfs_access_entry {
 
 struct nfs4_state;
 struct nfs_open_context {
-	atomic_t count;
+	struct kref kref;
 	struct path path;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
-- 
cgit v1.2.3


From 6529eba08fe7297852391a468d95322913de73fa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2007 16:40:14 -0400
Subject: SUNRPC: Move rpc_task->tk_task list into struct rpc_clnt

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h  | 4 ++++
 include/linux/sunrpc/sched.h | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 66611423c8ee..0801ab5407ce 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -26,6 +26,8 @@ struct rpc_inode;
 struct rpc_clnt {
 	atomic_t		cl_count;	/* Number of clones */
 	atomic_t		cl_users;	/* number of references */
+	struct list_head	cl_clients;	/* Global list of clients */
+	struct list_head	cl_tasks;	/* List of tasks */
 	struct rpc_xprt *	cl_xprt;	/* transport */
 	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
@@ -122,6 +124,8 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+void		rpc_register_client(struct rpc_clnt *);
+void		rpc_unregister_client(struct rpc_clnt *);
 int		rpcb_register(u32, u32, int, unsigned short, int *);
 void		rpcb_getport(struct rpc_task *);
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 2047fb202a13..3387b008cdfc 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -110,11 +110,6 @@ struct rpc_task {
 	if (!list_empty(head) &&  \
 	    ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
 
-/* .. and walking list of all tasks */
-#define	alltask_for_each(task, pos, head) \
-	list_for_each(pos, head) \
-		if ((task=list_entry(pos, struct rpc_task, tk_task)),1)
-
 typedef void			(*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
-- 
cgit v1.2.3


From 4bef61ff7514396419563ca54fd42ef846485b06 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 16 Jun 2007 14:17:01 -0400
Subject: SUNRPC: Add a per-rpc_clnt spinlock

Use that to protect the rpc_clnt->cl_tasks list instead of using a global
lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 0801ab5407ce..2f4b520a7419 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -28,6 +28,7 @@ struct rpc_clnt {
 	atomic_t		cl_users;	/* number of references */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
+	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt *	cl_xprt;	/* transport */
 	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
-- 
cgit v1.2.3


From 34f52e3591f241b825353ba27def956d8487c400 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2007 16:40:31 -0400
Subject: SUNRPC: Convert rpc_clnt->cl_users to a kref

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 2f4b520a7419..003d8ea70c19 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -24,8 +24,8 @@ struct rpc_inode;
  * The high-level client handle
  */
 struct rpc_clnt {
+	struct kref		cl_kref;	/* Number of references */
 	atomic_t		cl_count;	/* Number of clones */
-	atomic_t		cl_users;	/* number of references */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
-- 
cgit v1.2.3


From 848f1fe6be2e290691bb6c13cbb8fd92bd0cfaab Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 9 Jun 2007 19:39:12 -0400
Subject: SUNRPC: Kill rpc_clnt->cl_dead

Its use is at best racy, and there is only one user (lockd), which has
additional locking that makes the whole thing redundant.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 003d8ea70c19..ab3ef6d629a7 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,8 +45,7 @@ struct rpc_clnt {
 				cl_intr     : 1,/* interruptible */
 				cl_discrtry : 1,/* disconnect before retry */
 				cl_autobind : 1,/* use getport() */
-				cl_oneshot  : 1,/* dispose after use */
-				cl_dead     : 1;/* abandoned */
+				cl_oneshot  : 1;/* dispose after use */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 
-- 
cgit v1.2.3


From 90c5755ff5111ffdcca10a1e8a823dba29f37b6d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 9 Jun 2007 19:49:36 -0400
Subject: SUNRPC: Kill rpc_clnt->cl_oneshot

Replace it with explicit calls to rpc_shutdown_client() or
rpc_destroy_client() (for the case of asynchronous calls).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3ef6d629a7..fe7ea65ed0ae 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -44,8 +44,7 @@ struct rpc_clnt {
 	unsigned int		cl_softrtry : 1,/* soft timeouts */
 				cl_intr     : 1,/* interruptible */
 				cl_discrtry : 1,/* disconnect before retry */
-				cl_autobind : 1,/* use getport() */
-				cl_oneshot  : 1;/* dispose after use */
+				cl_autobind : 1;/* use getport() */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 
@@ -112,10 +111,9 @@ struct rpc_create_args {
 #define RPC_CLNT_CREATE_HARDRTRY	(1UL << 0)
 #define RPC_CLNT_CREATE_INTR		(1UL << 1)
 #define RPC_CLNT_CREATE_AUTOBIND	(1UL << 2)
-#define RPC_CLNT_CREATE_ONESHOT		(1UL << 3)
-#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 4)
-#define RPC_CLNT_CREATE_NOPING		(1UL << 5)
-#define RPC_CLNT_CREATE_DISCRTRY	(1UL << 6)
+#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 3)
+#define RPC_CLNT_CREATE_NOPING		(1UL << 4)
+#define RPC_CLNT_CREATE_DISCRTRY	(1UL << 5)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
-- 
cgit v1.2.3


From 4c402b40970382ded616eadd544fd63feb76cc79 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2007 16:40:32 -0400
Subject: SUNRPC: Remove rpc_clnt->cl_count

The kref now does most of what cl_count + cl_user used to do. The only
remaining role for cl_count is to tell us if we are in a 'shutdown'
phase. We can provide that information using a single bit field instead
of a full atomic counter.

Also rename rpc_destroy_client() to rpc_close_client(), which reflects
better what its role is these days.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index fe7ea65ed0ae..cf03494c36e7 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -25,7 +25,6 @@ struct rpc_inode;
  */
 struct rpc_clnt {
 	struct kref		cl_kref;	/* Number of references */
-	atomic_t		cl_count;	/* Number of clones */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
@@ -119,8 +118,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
-int		rpc_shutdown_client(struct rpc_clnt *);
-int		rpc_destroy_client(struct rpc_clnt *);
+void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 void		rpc_register_client(struct rpc_clnt *);
 void		rpc_unregister_client(struct rpc_clnt *);
-- 
cgit v1.2.3


From f61534dfd38f895b203e2aadaba04f21a992ca8c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2007 17:31:58 -0400
Subject: SUNRPC: Remove redundant calls to rpciod_up()/rpciod_down()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 52b4378311c8..144d955dc46a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -16,7 +16,6 @@ struct nfs_client {
 #define NFS_CS_INITING		1		/* busy initialising */
 	int			cl_nfsversion;	/* NFS protocol version */
 	unsigned long		cl_res_state;	/* NFS resources state */
-#define NFS_CS_RPCIOD		0		/* - rpciod started */
 #define NFS_CS_CALLBACK		1		/* - callback started */
 #define NFS_CS_IDMAP		2		/* - idmap started */
 #define NFS_CS_RENEWD		3		/* - renewd started */
-- 
cgit v1.2.3


From 188fef11db219f13f32d055ba59985e7d1a349fe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 16 Jun 2007 14:18:40 -0400
Subject: SUNRPC: Move rpc_register_client and friends into net/sunrpc/clnt.c

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index cf03494c36e7..a451351c7eff 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -120,8 +120,6 @@ struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
-void		rpc_register_client(struct rpc_clnt *);
-void		rpc_unregister_client(struct rpc_clnt *);
 int		rpcb_register(u32, u32, int, unsigned short, int *);
 void		rpcb_getport(struct rpc_task *);
 
-- 
cgit v1.2.3


From 4a8c1344dccb848dbcf0edabc8b5c51a8ecf2808 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 7 Jun 2007 10:14:14 -0400
Subject: SUNRPC: Add a backpointer from the struct rpc_cred to the rpc_auth

Cleans up an issue whereby rpcsec_gss uses the rpc_clnt->cl_auth. If we want
to be able to add several rpc_auths to a single rpc_clnt, then this abuse
must go.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 534cdc7be58d..8ef27afeea73 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -30,8 +30,11 @@ struct auth_cred {
 /*
  * Client user credentials
  */
+struct rpc_auth;
+struct rpc_credops;
 struct rpc_cred {
 	struct hlist_node	cr_hash;	/* hash chain */
+	struct rpc_auth *	cr_auth;
 	struct rpc_credops *	cr_ops;
 	unsigned long		cr_expire;	/* when to gc */
 	atomic_t		cr_count;	/* ref count */
@@ -60,6 +63,7 @@ struct rpc_cred_cache {
 	unsigned long		expire;		/* cache expiry interval */
 };
 
+struct rpc_authops;
 struct rpc_auth {
 	unsigned int		au_cslack;	/* call cred size estimate */
 				/* guess at number of u32's auth adds before
-- 
cgit v1.2.3


From 6e84c7b66a0aa0be16a7728d1e687c57978dac2c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 7 Jun 2007 15:31:36 -0400
Subject: SUNRPC: Add a downcall queue to struct rpc_inode

Currently, the downcall queue is tied to the struct gss_auth, which means
that different RPCSEC_GSS pseudoflavours must use different upcall pipes.
Add a list to struct rpc_inode that can be used instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index ad293760f6eb..430cea104817 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -23,6 +23,7 @@ struct rpc_inode {
 	void *private;
 	struct list_head pipe;
 	struct list_head in_upcall;
+	struct list_head in_downcall;
 	int pipelen;
 	int nreaders;
 	int nwriters;
-- 
cgit v1.2.3


From 03a1256f06cf1f58e33971fb4a524479e75c200e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 8 Jun 2007 14:14:53 -0400
Subject: SUNRPC: Add a field to track the number of kernel users of an
 rpc_pipe

This allows us to correctly deduce when we need to remove the pipe.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 430cea104817..51b977a4ca20 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -27,6 +27,7 @@ struct rpc_inode {
 	int pipelen;
 	int nreaders;
 	int nwriters;
+	int nkern_readwriters;
 	wait_queue_head_t waitq;
 #define RPC_PIPE_WAIT_FOR_OPEN	1
 	int flags;
-- 
cgit v1.2.3


From 3ab9bb7243489f9db3abf3d05521ddfc6b184c0a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 9 Jun 2007 15:41:42 -0400
Subject: SUNRPC: Fix a memory leak in the auth credcache code

The leak only affects the RPCSEC_GSS caches, since they are the only ones
that are dynamically allocated...
Rename the existing rpcauth_free_credcache() to rpcauth_clear_credcache()
in order to better describe its role, then add a new function
rpcauth_destroy_credcache() that actually frees the cache in addition to
clearing it out.

Also move the call to destroy the credcache in gss_destroy() to come before
the rpc upcall pipe is unlinked.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 8ef27afeea73..3972b8414c88 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -143,7 +143,8 @@ int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
 int			rpcauth_init_credcache(struct rpc_auth *, unsigned long);
-void			rpcauth_free_credcache(struct rpc_auth *);
+void			rpcauth_destroy_credcache(struct rpc_auth *);
+void			rpcauth_clear_credcache(struct rpc_cred_cache *);
 
 static inline
 struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
-- 
cgit v1.2.3


From 64c91a1f1c8bc4295fd6b90df8adf911a7dd64f4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 10:17:16 -0400
Subject: SUNRPC: Make rpc_ping() static

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a451351c7eff..a0e51e193284 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -136,7 +136,6 @@ void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
-int		rpc_ping(struct rpc_clnt *clnt, int flags);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
-- 
cgit v1.2.3


From 5e1550d6a2c2dd33ff0ca5febefd8e9c65c6ca1e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 10:17:16 -0400
Subject: SUNRPC: Add the helper function 'rpc_call_null()'

Does a NULL RPC call and returns a pointer to the resulting rpc_task. The
call may be either synchronous or asynchronous.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a0e51e193284..097984b03857 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -130,6 +130,8 @@ int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
 			       void *calldata);
 int		rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
 			      int flags);
+struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
+			       int flags);
 void		rpc_restart_call(struct rpc_task *);
 void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
-- 
cgit v1.2.3


From de7a8ce38aea529876db3890b61947bc4bc004da Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 10:46:47 -0400
Subject: SUNRPC: Rename rpcauth_destroy() to rpcauth_release()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3972b8414c88..bc77c730325c 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -128,7 +128,7 @@ extern struct rpc_authops	authdes_ops;
 int			rpcauth_register(struct rpc_authops *);
 int			rpcauth_unregister(struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
-void			rpcauth_destroy(struct rpc_auth *);
+void			rpcauth_release(struct rpc_auth *);
 struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
-- 
cgit v1.2.3


From f1c0a8615090359d57e096157feb9f900cbb233c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 20:17:58 -0400
Subject: SUNRPC: Mark auth and cred operation tables as constant.

Also do the same for gss_api operation tables.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h    | 15 ++++++---------
 include/linux/sunrpc/gss_api.h |  2 +-
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index bc77c730325c..e606c2804685 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -35,7 +35,7 @@ struct rpc_credops;
 struct rpc_cred {
 	struct hlist_node	cr_hash;	/* hash chain */
 	struct rpc_auth *	cr_auth;
-	struct rpc_credops *	cr_ops;
+	const struct rpc_credops *cr_ops;
 	unsigned long		cr_expire;	/* when to gc */
 	atomic_t		cr_count;	/* ref count */
 	unsigned short		cr_flags;	/* various flags */
@@ -73,7 +73,7 @@ struct rpc_auth {
 	unsigned int		au_verfsize;
 
 	unsigned int		au_flags;	/* various flags */
-	struct rpc_authops *	au_ops;		/* operations */
+	const struct rpc_authops *au_ops;		/* operations */
 	rpc_authflavor_t	au_flavor;	/* pseudoflavor (note may
 						 * differ from the flavor in
 						 * au_ops->au_flavor in gss
@@ -119,14 +119,11 @@ struct rpc_credops {
 						void *, __be32 *, void *);
 };
 
-extern struct rpc_authops	authunix_ops;
-extern struct rpc_authops	authnull_ops;
-#ifdef CONFIG_SUNRPC_SECURE
-extern struct rpc_authops	authdes_ops;
-#endif
+extern const struct rpc_authops	authunix_ops;
+extern const struct rpc_authops	authnull_ops;
 
-int			rpcauth_register(struct rpc_authops *);
-int			rpcauth_unregister(struct rpc_authops *);
+int			rpcauth_register(const struct rpc_authops *);
+int			rpcauth_unregister(const struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 5eca9e442051..bbac101ac372 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -77,7 +77,7 @@ struct gss_api_mech {
 	struct module		*gm_owner;
 	struct xdr_netobj	gm_oid;
 	char			*gm_name;
-	struct gss_api_ops	*gm_ops;
+	const struct gss_api_ops *gm_ops;
 	/* pseudoflavors supported by this mechanism: */
 	int			gm_pf_num;
 	struct pf_desc *	gm_pfs;
-- 
cgit v1.2.3


From 5fe4755e2526a2aa82b7ed8daeb3aed74a236925 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 19:55:31 -0400
Subject: SUNRPC: Clean up rpc credential initialisation

Add a helper rpc_cred_init()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h     | 1 +
 include/linux/sunrpc/auth_gss.h | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index e606c2804685..d5bfc67461fc 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -127,6 +127,7 @@ int			rpcauth_unregister(const struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
+void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
 void			rpcauth_holdcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 2db2fbf34947..0bd1d06777b9 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -85,11 +85,6 @@ struct gss_cred {
 	struct gss_upcall_msg	*gc_upcall;
 };
 
-#define gc_uid			gc_base.cr_uid
-#define gc_count		gc_base.cr_count
-#define gc_flags		gc_base.cr_flags
-#define gc_expire		gc_base.cr_expire
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_AUTH_GSS_H */
 
-- 
cgit v1.2.3


From fc432dd90760a629c57026e57f65ff80a1a31d2f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Jun 2007 10:15:15 -0400
Subject: SUNRPC: Enforce atomic updates of rpc_cred->cr_flags

Convert to the use of atomic bitops...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d5bfc67461fc..8586503d5ebd 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -36,19 +36,19 @@ struct rpc_cred {
 	struct hlist_node	cr_hash;	/* hash chain */
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
-	unsigned long		cr_expire;	/* when to gc */
-	atomic_t		cr_count;	/* ref count */
-	unsigned short		cr_flags;	/* various flags */
 #ifdef RPC_DEBUG
 	unsigned long		cr_magic;	/* 0x0f4aa4f0 */
 #endif
+	unsigned long		cr_expire;	/* when to gc */
+	unsigned long		cr_flags;	/* various flags */
+	atomic_t		cr_count;	/* ref count */
 
 	uid_t			cr_uid;
 
 	/* per-flavor data */
 };
-#define RPCAUTH_CRED_NEW	0x0001
-#define RPCAUTH_CRED_UPTODATE	0x0002
+#define RPCAUTH_CRED_NEW	0
+#define RPCAUTH_CRED_UPTODATE	1
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
-- 
cgit v1.2.3


From e092bdcd939416ef911090890096fe07d0281a5e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 23 Jun 2007 19:45:36 -0400
Subject: SUNRPC: cleanup rpc credential cache garbage collection

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 8586503d5ebd..4e78f0c5f014 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -34,6 +34,7 @@ struct rpc_auth;
 struct rpc_credops;
 struct rpc_cred {
 	struct hlist_node	cr_hash;	/* hash chain */
+	struct list_head	cr_lru;		/* lru garbage collection */
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
 #ifdef RPC_DEBUG
-- 
cgit v1.2.3


From 31be5bf15f3dafffce110eb1afadccbf2e3067b4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Jun 2007 15:55:26 -0400
Subject: SUNRPC: Convert the credcache lookup code to use RCU

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 4e78f0c5f014..5974e8a493c4 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/xdr.h>
 
 #include <asm/atomic.h>
+#include <linux/rcupdate.h>
 
 /* size of the nodename buffer */
 #define UNX_MAXNODENAME	32
@@ -35,6 +36,7 @@ struct rpc_credops;
 struct rpc_cred {
 	struct hlist_node	cr_hash;	/* hash chain */
 	struct list_head	cr_lru;		/* lru garbage collection */
+	struct rcu_head		cr_rcu;
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
 #ifdef RPC_DEBUG
@@ -50,6 +52,7 @@ struct rpc_cred {
 };
 #define RPCAUTH_CRED_NEW	0
 #define RPCAUTH_CRED_UPTODATE	1
+#define RPCAUTH_CRED_HASHED	2
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
-- 
cgit v1.2.3


From 9499b4341b56935f61af9e7e354e7d11e70f5258 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Jun 2007 15:57:57 -0400
Subject: SUNRPC: Give credential cache a local spinlock

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 5974e8a493c4..e5a3b5141ed2 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -63,6 +63,7 @@ struct rpc_cred {
 #define RPC_CREDCACHE_MASK	(RPC_CREDCACHE_NR - 1)
 struct rpc_cred_cache {
 	struct hlist_head	hashtable[RPC_CREDCACHE_NR];
+	spinlock_t		lock;
 	unsigned long		nextgc;		/* next garbage collection */
 	unsigned long		expire;		/* cache expiry interval */
 };
@@ -126,6 +127,8 @@ struct rpc_credops {
 extern const struct rpc_authops	authunix_ops;
 extern const struct rpc_authops	authnull_ops;
 
+void __init		rpc_init_authunix(void);
+
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
-- 
cgit v1.2.3


From f5c2187cfef628784d8a09b6d0f77888246d0c0f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Jun 2007 17:11:20 -0400
Subject: SUNRPC: Convert the credential garbage collector into a shrinker
 callback

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index e5a3b5141ed2..7a69ca3bebaf 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -64,8 +64,6 @@ struct rpc_cred {
 struct rpc_cred_cache {
 	struct hlist_head	hashtable[RPC_CREDCACHE_NR];
 	spinlock_t		lock;
-	unsigned long		nextgc;		/* next garbage collection */
-	unsigned long		expire;		/* cache expiry interval */
 };
 
 struct rpc_authops;
@@ -128,6 +126,8 @@ extern const struct rpc_authops	authunix_ops;
 extern const struct rpc_authops	authnull_ops;
 
 void __init		rpc_init_authunix(void);
+void __init		rpcauth_init_module(void);
+void __exit		rpcauth_remove_module(void);
 
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
@@ -147,7 +147,7 @@ int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
-int			rpcauth_init_credcache(struct rpc_auth *, unsigned long);
+int			rpcauth_init_credcache(struct rpc_auth *);
 void			rpcauth_destroy_credcache(struct rpc_auth *);
 void			rpcauth_clear_credcache(struct rpc_cred_cache *);
 
-- 
cgit v1.2.3


From 5d28dc82074f1e64b22c9424b161abc1f5d6bcdb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jun 2007 19:18:38 -0400
Subject: SUNRPC: Convert gss_ctx_lock to an RCU lock

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth_gss.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 0bd1d06777b9..67658e17a375 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -75,6 +75,7 @@ struct gss_cl_ctx {
 	struct xdr_netobj	gc_wire_ctx;
 	u32			gc_win;
 	unsigned long		gc_expiry;
+	struct rcu_head		gc_rcu;
 };
 
 struct gss_upcall_msg;
-- 
cgit v1.2.3


From 1be27f36601973815171db684c711d30557cf50c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 27 Jun 2007 14:29:04 -0400
Subject: SUNRPC: Remove the tk_auth macro...

We should almost always be deferencing the rpc_auth struct by means of the
credential's cr_auth field instead of the rpc_clnt->cl_auth anyway. Fix up
that historical mistake, and remove the macro that propagated it.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 3387b008cdfc..8ea077db0099 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -98,7 +98,6 @@ struct rpc_task {
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
 };
-#define tk_auth			tk_client->cl_auth
 #define tk_xprt			tk_client->cl_xprt
 
 /* support walking a list of tasks on a wait queue */
-- 
cgit v1.2.3


From 587142f85f796cf0b823dd3080e815f02ff6b952 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 2 Jul 2007 09:57:54 -0400
Subject: NFS: Replace NFS_I(inode)->req_lock with inode->i_lock

There is no justification for keeping a special spinlock for the exclusive
use of the NFS writeback code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bf24151d63be..cf395351cdd4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -156,7 +156,6 @@ struct nfs_inode {
 	/*
 	 * This is the list of dirty unwritten pages.
 	 */
-	spinlock_t		req_lock;
 	struct radix_tree_root	nfs_page_tree;
 
 	unsigned long		ncommit,
-- 
cgit v1.2.3


From 7af654f8d1b7460415af5d1d326233478dd0f563 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 2 Jul 2007 12:49:23 -0400
Subject: NFSv4: Don't reuse expired nfs4_state_owner structs

That just confuses certain NFSv4 servers.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 144d955dc46a..2cef0a68aa77 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -44,8 +44,6 @@ struct nfs_client {
 
 	struct list_head	cl_delegations;
 	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
 	spinlock_t		cl_lock;
 
 	unsigned long		cl_lease_time;
-- 
cgit v1.2.3


From 9f958ab8858c75df800e0121b1920182820cbc39 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 2 Jul 2007 13:58:33 -0400
Subject: NFSv4: Reduce the chances of an open_owner identifier collision

Currently we just use a 32-bit counter.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 5 +++--
 include/linux/nfs_xdr.h   | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2cef0a68aa77..0cac49bc0955 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -34,7 +34,8 @@ struct nfs_client {
 	nfs4_verifier		cl_confirm;
 	unsigned long		cl_state;
 
-	u32			cl_lockowner_id;
+	struct rb_root		cl_openowner_id;
+	struct rb_root		cl_lockowner_id;
 
 	/*
 	 * The following rwsem ensures exclusive access to the server
@@ -43,7 +44,7 @@ struct nfs_client {
 	struct rw_semaphore	cl_sem;
 
 	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
+	struct rb_root		cl_state_owners;
 	spinlock_t		cl_lock;
 
 	unsigned long		cl_lease_time;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f7100df3a690..38d77681cf27 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -119,7 +119,7 @@ struct nfs_openargs {
 	struct nfs_seqid *	seqid;
 	int			open_flags;
 	__u64                   clientid;
-	__u32                   id;
+	__u64                   id;
 	union {
 		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
 		nfs4_verifier   verifier; /* EXCLUSIVE */
@@ -181,7 +181,7 @@ struct nfs_closeres {
  *   */
 struct nfs_lowner {
 	__u64			clientid;
-	u32			id;
+	__u64			id;
 };
 
 struct nfs_lock_args {
-- 
cgit v1.2.3


From 412c77cee6d6e73fbe1dc3d67f52163efed33fc4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jul 2007 16:10:55 -0400
Subject: NFSv4: Defer inode revalidation when setting up a delegation

Currently we force a synchronous call to __nfs_revalidate_inode() in
nfs_inode_set_delegation(). This not only ensures that we cannot call
nfs_inode_set_delegation from an asynchronous context, but it also slows
down any call to open().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cf395351cdd4..e94971040de9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -184,6 +184,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_ACCESS	0x0008		/* cached access cred invalid */
 #define NFS_INO_INVALID_ACL	0x0010		/* cached acls are invalid */
 #define NFS_INO_REVAL_PAGECACHE	0x0020		/* must revalidate pagecache */
+#define NFS_INO_REVAL_FORCED	0x0040		/* force revalidation ignoring a delegation */
 
 /*
  * Bit offsets in flags field
-- 
cgit v1.2.3


From 433c92379d9c2c59c2ebc7628fe4fb02cfc2daf8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:12:14 -0400
Subject: NFS: Clean up nfs_size_to_loff_t()

Use the same file size limit that lockd uses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e94971040de9..7deb5b0347f7 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -503,12 +503,10 @@ extern int  nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *,
  * inline functions
  */
 
-static inline loff_t
-nfs_size_to_loff_t(__u64 size)
+static inline loff_t nfs_size_to_loff_t(__u64 size)
 {
-	loff_t maxsz = (((loff_t) ULONG_MAX) << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE - 1;
-	if (size > maxsz)
-		return maxsz;
+	if (size > (__u64) OFFSET_MAX - 1)
+		return OFFSET_MAX - 1;
 	return (loff_t) size;
 }
 
-- 
cgit v1.2.3


From 5680d48be88d12cd987e5579a6072a4ca34ca6ea Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:12:24 -0400
Subject: NFS: Clean-up: Define macros for maximum host and export path name
 lengths

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_mount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index cc8b9c59acb8..0b82a17c705b 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -37,7 +37,7 @@ struct nfs_mount_data {
 	int		acdirmin;		/* 1 */
 	int		acdirmax;		/* 1 */
 	struct sockaddr_in addr;		/* 1 */
-	char		hostname[256];		/* 1 */
+	char		hostname[NFS_MAXNAMLEN + 1];		/* 1 */
 	int		namlen;			/* 2 */
 	unsigned int	bsize;			/* 3 */
 	struct nfs3_fh	root;			/* 4 */
-- 
cgit v1.2.3


From f18289931d705f9c4634b361341a1677bea97aca Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:12:51 -0400
Subject: NFS: Add a new NFS debugging flag just for mount processing

Note to self: fix up /usr/sbin/rpcdebug too

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7deb5b0347f7..04f659f1e560 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -553,6 +553,7 @@ extern void * nfs_root_data(void);
 #define NFSDBG_ROOT		0x0080
 #define NFSDBG_CALLBACK		0x0100
 #define NFSDBG_CLIENT		0x0200
+#define NFSDBG_MOUNT		0x0400
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From cce63cd6374e6f1b4ea897ece1454feb13993d7c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:13:12 -0400
Subject: SUNRPC: Rename rpcb_getport_external routine

In preparation for handling NFS mount option parsing in the kernel,
rename rpcb_getport_external as rpcb_get_port_sync, and make it available
always (instead of only when CONFIG_ROOT_NFS is enabled).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 097984b03857..b28d919c7758 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -120,8 +120,10 @@ struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+
 int		rpcb_register(u32, u32, int, unsigned short, int *);
 void		rpcb_getport(struct rpc_task *);
+int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
 
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
 
@@ -141,10 +143,5 @@ void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
-/*
- * Helper function for NFSroot support
- */
-int		rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int);
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From 45160d6275814e0c86206e6981f0b92c61a50a21 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:13:17 -0400
Subject: SUNRPC: Rename rpcb_getport to be consistent with new
 rpcb_getport_sync name

Clean up, for consistency.  Rename rpcb_getport as rpcb_getport_async, to
match the naming scheme of rpcb_getport_sync.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b28d919c7758..c1b37972b0d5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -122,8 +122,8 @@ void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 
 int		rpcb_register(u32, u32, int, unsigned short, int *);
-void		rpcb_getport(struct rpc_task *);
 int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
+void		rpcb_getport_async(struct rpc_task *);
 
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
 
-- 
cgit v1.2.3


From 3ea97309e6b18bce200211b3f9188e8023321adc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:13:27 -0400
Subject: NFS: Remake nfsroot_mount as a permanent part of NFS client

In preparation for supporting NFSv2 and NFSv3 mount option handling in the
kernel NFS client, convert mount_clnt.c to be a permanent part of the NFS
client, instead of built only when CONFIG_ROOT_NFS is enabled.

In addition, we also replace the "struct sockaddr_in *" argument with
something more generic, to help support IPv6 at some later point.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 04f659f1e560..c098ae194f79 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -494,10 +494,9 @@ static inline void nfs3_forget_cached_acls(struct inode *inode)
 
 /*
  * linux/fs/mount_clnt.c
- * (Used only by nfsroot module)
  */
-extern int  nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *,
-		int, int);
+extern int  nfs_mount(struct sockaddr *, size_t, char *, char *,
+		      int, int, struct nfs_fh *);
 
 /*
  * inline functions
-- 
cgit v1.2.3


From 8007122520f0a3599bdc4df47358a5d83b2574aa Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 1 Jul 2007 12:13:59 -0400
Subject: NFS: Add support for mounting NFSv4 file systems with string options

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4_mount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index 26b4c83f831d..d8d7480e5a47 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -65,6 +65,6 @@ struct nfs4_mount_data {
 #define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
 #define NFS4_MOUNT_NOAC		0x0020	/* 1 */
 #define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
-#define NFS4_MOUNT_FLAGMASK	0xFFFF
+#define NFS4_MOUNT_FLAGMASK	0x1033
 
 #endif
-- 
cgit v1.2.3


From 75180df2ed467866ada839fe73cf7cc7d75c0a22 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 16 May 2007 16:53:28 -0400
Subject: NFS: Add the mount option "nosharecache"

Prior to David Howell's mount changes in 2.6.18, users who mounted
different directories which happened to be from the same filesystem on the
server would get different super blocks, and hence could choose different
mount options. As long as there were no hard linked files that crossed from
one subtree to another, this was quite safe.
Post the changes, if the two directories are on the same filesystem (have
the same 'fsid'), they will share the same super block, and hence the same
mount options.

Add a flag to allow users to elect not to share the NFS super block with
another mount point, even if the fsids are the same. This will allow
users to set different mount options for the two different super blocks, as
was previously possible. It is still up to the user to ensure that there
are no cache coherency issues when doing this, however the default
behaviour will be to share super blocks whenever two paths result in
the same fsid.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4_mount.h | 3 ++-
 include/linux/nfs_mount.h  | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index d8d7480e5a47..a0dcf6655657 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -65,6 +65,7 @@ struct nfs4_mount_data {
 #define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
 #define NFS4_MOUNT_NOAC		0x0020	/* 1 */
 #define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
-#define NFS4_MOUNT_FLAGMASK	0x1033
+#define NFS4_MOUNT_UNSHARED	0x8000	/* 1 */
+#define NFS4_MOUNT_FLAGMASK	0x9033
 
 #endif
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 0b82a17c705b..a3ade89a64d2 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -62,6 +62,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
 #define NFS_MOUNT_NORDIRPLUS	0x4000	/* 5 */
+#define NFS_MOUNT_UNSHARED	0x8000	/* 5 */
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif
-- 
cgit v1.2.3


From 96802a095171f5b35cf0e1e0d4be943e6696a253 Mon Sep 17 00:00:00 2001
From: Frank van Maarseveen <frankvm@frankvm.com>
Date: Sun, 8 Jul 2007 13:08:54 +0200
Subject: SUNRPC: cleanup transport creation argument passing

Cleanup argument passing to functions for creating an RPC transport.

Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 34f7590506fa..ea828b09e4ad 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -17,6 +17,8 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
 
+#ifdef __KERNEL__
+
 extern unsigned int xprt_udp_slot_table_entries;
 extern unsigned int xprt_tcp_slot_table_entries;
 
@@ -194,7 +196,12 @@ struct rpc_xprt {
 	char *			address_strings[RPC_DISPLAY_MAX];
 };
 
-#ifdef __KERNEL__
+struct rpc_xprtsock_create {
+	int			proto;		/* IPPROTO_UDP or IPPROTO_TCP */
+	struct sockaddr *	dstaddr;	/* remote peer address */
+	size_t			addrlen;
+	struct rpc_timeout *	timeout;	/* optional timeout parameters */
+};
 
 /*
  * Transport operations used by ULPs
@@ -204,7 +211,7 @@ void			xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
 /*
  * Generic internal transport functions
  */
-struct rpc_xprt *	xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms);
+struct rpc_xprt *	xprt_create_transport(struct rpc_xprtsock_create *args);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_task *task);
@@ -242,8 +249,8 @@ void			xprt_disconnect(struct rpc_xprt *xprt);
 /*
  * Socket transport setup operations
  */
-struct rpc_xprt *	xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
-struct rpc_xprt *	xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
+struct rpc_xprt *	xs_setup_udp(struct rpc_xprtsock_create *args);
+struct rpc_xprt *	xs_setup_tcp(struct rpc_xprtsock_create *args);
 int			init_socket_xprt(void);
 void			cleanup_socket_xprt(void);
 
-- 
cgit v1.2.3


From a97476926ec061f90b77da478620ea6dc71a3237 Mon Sep 17 00:00:00 2001
From: Frank van Maarseveen <frankvm@frankvm.com>
Date: Mon, 9 Jul 2007 22:21:39 +0200
Subject: SUNRPC server: record the destination address of a request

Save the destination address of an incoming request over TCP like is
done already for UDP. It is necessary later for callbacks by the server.

Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svcsock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index e21dd93ac4b7..a53e0fa855d2 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -59,6 +59,7 @@ struct svc_sock {
 	/* cache of various info for TCP sockets */
 	void			*sk_info_authunix;
 
+	struct sockaddr_storage	sk_local;	/* local address */
 	struct sockaddr_storage	sk_remote;	/* remote peer's address */
 	int			sk_remotelen;	/* length of address */
 };
-- 
cgit v1.2.3


From d3bc9a1deb8964d774af8535814cb91bf8f6def0 Mon Sep 17 00:00:00 2001
From: Frank van Maarseveen <frankvm@frankvm.com>
Date: Mon, 9 Jul 2007 22:23:35 +0200
Subject: SUNRPC client: add interface for binding to a local address

In addition to binding to a local privileged port the NFS client should
allow binding to a specific local address. This is used by the server
for callbacks. The patch adds the necessary interface.

Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 include/linux/sunrpc/xprt.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c1b37972b0d5..c0d9d14983b3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -98,6 +98,7 @@ struct rpc_create_args {
 	int			protocol;
 	struct sockaddr		*address;
 	size_t			addrsize;
+	struct sockaddr		*saddress;
 	struct rpc_timeout	*timeout;
 	char			*servername;
 	struct rpc_program	*program;
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ea828b09e4ad..d11cedd14f0f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -198,6 +198,7 @@ struct rpc_xprt {
 
 struct rpc_xprtsock_create {
 	int			proto;		/* IPPROTO_UDP or IPPROTO_TCP */
+	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
 	struct rpc_timeout *	timeout;	/* optional timeout parameters */
-- 
cgit v1.2.3


From c98451bdb2f3e6d6cc1e03adad641e9497512b49 Mon Sep 17 00:00:00 2001
From: Frank van Maarseveen <frankvm@frankvm.com>
Date: Mon, 9 Jul 2007 22:25:29 +0200
Subject: NLM: fix source address of callback to client

Use the destination address of the original NLM request as the
source address in callbacks to the client.

Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/lockd/lockd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 05707e2fccae..e2d1ce36b367 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -39,6 +39,7 @@
 struct nlm_host {
 	struct hlist_node	h_hash;		/* doubly linked list */
 	struct sockaddr_in	h_addr;		/* peer address */
+	struct sockaddr_in	h_saddr;	/* our address (optional) */
 	struct rpc_clnt	*	h_rpcclnt;	/* RPC client to talk to peer */
 	char *			h_name;		/* remote hostname */
 	u32			h_version;	/* interface version */
-- 
cgit v1.2.3


From e06e7c615877026544ad7f8b309d1a3706410383 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 10 Jun 2007 17:22:39 -0700
Subject: [IPV4]: The scheduled removal of multipath cached routing support.

With help from Chris Wedgwood.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      |  1 -
 include/linux/ip_mp_alg.h | 22 ----------------------
 include/linux/rtnetlink.h |  2 +-
 3 files changed, 1 insertion(+), 24 deletions(-)
 delete mode 100644 include/linux/ip_mp_alg.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index afae306b177c..d94451682761 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -91,7 +91,6 @@ header-y += in6.h
 header-y += in_route.h
 header-y += ioctl.h
 header-y += ipmi_msgdefs.h
-header-y += ip_mp_alg.h
 header-y += ipsec.h
 header-y += ipx.h
 header-y += irda.h
diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h
deleted file mode 100644
index e234e2008f5d..000000000000
--- a/include/linux/ip_mp_alg.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#ifndef _LINUX_IP_MP_ALG_H
-#define _LINUX_IP_MP_ALG_H
-
-enum ip_mp_alg {
-	IP_MP_ALG_NONE,
-	IP_MP_ALG_RR,
-	IP_MP_ALG_DRR,
-	IP_MP_ALG_RANDOM,
-	IP_MP_ALG_WRANDOM,
-	__IP_MP_ALG_MAX
-};
-
-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
-
-#endif /* _LINUX_IP_MP_ALG_H */
-
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fae30af91f3..612785848532 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -261,7 +261,7 @@ enum rtattr_type_t
 	RTA_FLOW,
 	RTA_CACHEINFO,
 	RTA_SESSION,
-	RTA_MP_ALGO,
+	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
 	__RTA_MAX
 };
-- 
cgit v1.2.3


From 8c7b7faaa630fef7f68d8728cee1cce398cc9697 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2007 22:08:12 -0700
Subject: [NET]: Kill eth_copy_and_sum().

It hasn't "summed" anything in over 7 years, and it's
just a straight mempcy ala skb_copy_to_linear_data()
so just get rid of it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 071c67abed86..f48eb89efd0f 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -40,12 +40,6 @@ extern int		eth_header_cache(struct neighbour *neigh,
 					 struct hh_cache *hh);
 
 extern struct net_device *alloc_etherdev(int sizeof_priv);
-static inline void eth_copy_and_sum (struct sk_buff *dest, 
-				     const unsigned char *src, 
-				     int len, int base)
-{
-	memcpy (dest->data, src, len);
-}
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
-- 
cgit v1.2.3


From 6472ce6096bf27d85a1f2580964a36f290bd60a9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:03:21 -0700
Subject: [NET]: Mark struct net_device * argument to netdev_priv const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a70f553b28f..94cc77cd3aa3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -546,7 +546,7 @@ struct net_device
 #define	NETDEV_ALIGN		32
 #define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
 
-static inline void *netdev_priv(struct net_device *dev)
+static inline void *netdev_priv(const struct net_device *dev)
 {
 	return (char *)dev + ((sizeof(struct net_device)
 					+ NETDEV_ALIGN_CONST)
-- 
cgit v1.2.3


From 38f7b870d4a6a5d3ec21557e849620cb7d032965 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:03:51 -0700
Subject: [RTNETLINK]: Link creation API

Add rtnetlink API for creating, changing and deleting software devices.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   | 13 +++++++++++++
 include/linux/netdevice.h |  3 +++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 604c2434f71c..3144babd2357 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -76,6 +76,8 @@ enum
 #define IFLA_WEIGHT IFLA_WEIGHT
 	IFLA_OPERSTATE,
 	IFLA_LINKMODE,
+	IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
 	__IFLA_MAX
 };
 
@@ -140,4 +142,15 @@ struct ifla_cacheinfo
 	__u32	retrans_time;
 };
 
+enum
+{
+	IFLA_INFO_UNSPEC,
+	IFLA_INFO_KIND,
+	IFLA_INFO_DATA,
+	IFLA_INFO_XSTATS,
+	__IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94cc77cd3aa3..e7913ee5581c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -540,6 +540,9 @@ struct net_device
 	struct device		dev;
 	/* space for optional statistics and wireless sysfs groups */
 	struct attribute_group  *sysfs_groups[3];
+
+	/* rtnetlink link ops */
+	const struct rtnl_link_ops *rtnl_link_ops;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
-- 
cgit v1.2.3


From 734423cf38021966a5d3bd5f5c6aaecaf32fb4ac Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:07 -0700
Subject: [VLAN]: Use 32 bit value for skb->priority mapping

skb->priority has only 32 bits and even VLAN uses 32 bit values in its API.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 81e9bc93569b..aeddb49193f9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -99,7 +99,7 @@ static inline void vlan_group_set_device(struct vlan_group *vg, int vlan_id,
 }
 
 struct vlan_priority_tci_mapping {
-	unsigned long priority;
+	u32 priority;
 	unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
 				  * at provisioning time.
 				  * ((skb->priority << 13) & 0xE000)
@@ -112,7 +112,7 @@ struct vlan_dev_info {
 	/** This will be the mapping that correlates skb->priority to
 	 * 3 bits of VLAN QOS tags...
 	 */
-	unsigned long ingress_priority_map[8];
+	u32 ingress_priority_map[8];
 	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
 
 	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
-- 
cgit v1.2.3


From b020cb488586f982f40eb257a32e92a4de710d65 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:22 -0700
Subject: [VLAN]: Keep track of number of QoS mappings

Keep track of the number of configured ingress/egress QoS mappings to
avoid iteration while calculating the netlink attribute size.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index aeddb49193f9..b46d4225f74e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -112,7 +112,10 @@ struct vlan_dev_info {
 	/** This will be the mapping that correlates skb->priority to
 	 * 3 bits of VLAN QOS tags...
 	 */
+	unsigned int nr_ingress_mappings;
 	u32 ingress_priority_map[8];
+
+	unsigned int nr_egress_mappings;
 	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
 
 	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
-- 
cgit v1.2.3


From a4bf3af4ac46802436d352ef409cee4fe80445b3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:37 -0700
Subject: [VLAN]: Introduce symbolic constants for flag values

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index b46d4225f74e..c7912876a210 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -398,6 +398,10 @@ enum vlan_ioctl_cmds {
 	GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */
 };
 
+enum vlan_flags {
+	VLAN_FLAG_REORDER_HDR	= 0x1,
+};
+
 enum vlan_name_types {
 	VLAN_NAME_TYPE_PLUS_VID, /* Name will look like:  vlan0005 */
 	VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like:  eth1.0005 */
-- 
cgit v1.2.3


From 07b5b17e157b7018d0ca40ca0d1581a23096fb45 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Jun 2007 12:07:54 -0700
Subject: [VLAN]: Use rtnl_link API

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 3144babd2357..422084d18ce1 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -153,4 +153,38 @@ enum
 
 #define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
 
+/* VLAN section */
+
+enum
+{
+	IFLA_VLAN_UNSPEC,
+	IFLA_VLAN_ID,
+	IFLA_VLAN_FLAGS,
+	IFLA_VLAN_EGRESS_QOS,
+	IFLA_VLAN_INGRESS_QOS,
+	__IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX	(__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
+enum
+{
+	IFLA_VLAN_QOS_UNSPEC,
+	IFLA_VLAN_QOS_MAPPING,
+	__IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping
+{
+	__u32 from;
+	__u32 to;
+};
+
 #endif /* _LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From f1c91da44728fba24927e44056a56e507c11cf7b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 16 Jun 2007 12:38:51 -0300
Subject: [KTIME]: Introduce ktime_us_delta

This provides a reusable time difference function which returns the difference in
microseconds, as often used in the DCCP code.

Commiter note: renamed ktime_delta to ktime_us_delta and put it in ktime.h.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 include/linux/ktime.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2b139f66027f..923665958f90 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -279,6 +279,11 @@ static inline s64 ktime_to_us(const ktime_t kt)
 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
 }
 
+static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
+{
+       return ktime_to_us(ktime_sub(later, earlier));
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
-- 
cgit v1.2.3


From 1e180f726a58089d15637b5495fecbad8c50c833 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 16 Jun 2007 12:39:38 -0300
Subject: [KTIME]: Introduce ktime_add_us

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 include/linux/ktime.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 923665958f90..dae7143644fe 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -284,6 +284,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
        return ktime_to_us(ktime_sub(later, earlier));
 }
 
+static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
+{
+	return ktime_add_ns(kt, usec * 1000);
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
-- 
cgit v1.2.3


From 334a8132d9950f769f390f0f35c233d099688e7a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 25 Jun 2007 04:35:20 -0700
Subject: [SKBUFF]: Keep track of writable header len of headerless clones

Currently NAT (and others) that want to modify cloned skbs copy them,
even if in the vast majority of cases its not necessary because the
skb is a clone made by TCP and the portion NAT wants to modify is
actually writable because TCP release the header reference before
cloning.

The problem is that there is no clean way for NAT to find out how
long the writable header area is, so this patch introduces skb->hdr_len
to hold this length. When a headerless skb is cloned skb->hdr_len
is set to the current headroom, for regular clones it is copied from
the original. A new function skb_clone_writable(skb, len) returns
whether the skb is writable up to len bytes from skb->data. To avoid
enlarging the skb the mac_len field is reduced to 16 bit and the
new hdr_len field is put in the remaining 16 bit.

I've done a few rough benchmarks of NAT (not with this exact patch,
but a very similar one). As expected it saves huge amounts of system
time in case of sendfile, bringing it down to basically the same
amount as without NAT, with sendmsg it only helps on loopback,
probably because of the large MTU.

Transmit a 1GB file using sendfile/sendmsg over eth0/lo with and
without NAT:

- sendfile eth0, no NAT:	sys     0m0.388s
- sendfile eth0, NAT:		sys     0m1.835s
- sendfile eth0: NAT + path:	sys     0m0.370s	(~ -80%)

- sendfile lo, no NAT:		sys     0m0.258s
- sendfile lo, NAT:		sys     0m2.609s
- sendfile lo, NAT + patch:	sys     0m0.260s	(~ -90%)

- sendmsg eth0, no NAT:		sys     0m2.508s
- sendmsg eth0, NAT:		sys     0m2.539s
- sendmsg eth0, NAT + patch:	sys     0m2.445s	(no change)

- sendmsg lo, no NAT:		sys	0m2.151s
- sendmsg lo, NAT:		sys     0m3.557s
- sendmsg lo, NAT + patch:	sys     0m2.159s	(~ -40%)

I expect other users can see a similar performance improvement,
packet mangling iptables targets, ipip and ip_gre come to mind ..

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b2f7d0010..881fe80f01d0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -147,8 +147,8 @@ struct skb_shared_info {
 
 /* We divide dataref into two halves.  The higher 16 bits hold references
  * to the payload part of skb->data.  The lower 16 bits hold references to
- * the entire skb->data.  It is up to the users of the skb to agree on
- * where the payload starts.
+ * the entire skb->data.  A clone of a headerless skb holds the length of
+ * the header in skb->hdr_len.
  *
  * All users must obey the rule that the skb->data reference count must be
  * greater than or equal to the payload reference count.
@@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
+ *	@hdr_len: writable header length of cloned skb
  *	@csum: Checksum (must include start/offset pair)
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
@@ -260,8 +261,9 @@ struct sk_buff {
 	char			cb[48];
 
 	unsigned int		len,
-				data_len,
-				mac_len;
+				data_len;
+	__u16			mac_len,
+				hdr_len;
 	union {
 		__wsum		csum;
 		struct {
@@ -1321,6 +1323,20 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
 	return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
 }
 
+/**
+ *	skb_clone_writable - is the header of a clone writable
+ *	@skb: buffer to check
+ *	@len: length up to which to write
+ *
+ *	Returns true if modifying the header part of the cloned buffer
+ *	does not requires the data to be copied.
+ */
+static inline int skb_clone_writable(struct sk_buff *skb, int len)
+{
+	return !skb_header_cloned(skb) &&
+	       skb_headroom(skb) + len <= skb->hdr_len;
+}
+
 /**
  *	skb_cow - copy header of skb when it is required
  *	@skb: buffer to cow
-- 
cgit v1.2.3


From afdc3238ec948531205f5c5f77d2de7bae519c71 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 25 Jun 2007 14:30:16 -0700
Subject: [RTNETLINK]: Add nested compat attribute

Add a nested compat attribute type that can be used to convert
attributes that contain a structure to nested attributes in a
backwards compatible way.

The attribute looks like this:

struct {
        [ compat contents ]
        struct rtattr {
                .rta_len        = total size,
                .rta_type       = type,
        } rta;
        struct old_structure struct;

        [ nested top-level attribute ]
        struct rtattr {
                .rta_len        = nest size,
                .rta_type       = type,
        } nest_attr;

        [ optional 0 .. n nested attributes ]
        struct rtattr {
                .rta_len        = private attribute len,
                .rta_type       = private attribute typ,
        } nested_attr;
        struct nested_data data;
};

Since both userspace and kernel deal correctly with attributes that are
larger than expected old versions will just parse the compat part and
ignore the rest.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 612785848532..6731e7f4cc0f 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -570,6 +570,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 }
 
 extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
+extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+				      struct rtattr *rta, void **data, int len);
 
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
@@ -638,6 +640,18 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 ({	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
 	(skb)->len; })
 
+#define RTA_NEST_COMPAT(skb, type, attrlen, data) \
+({	struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
+	RTA_PUT(skb, type, attrlen, data); \
+	RTA_NEST(skb, type); \
+	__start; })
+
+#define RTA_NEST_COMPAT_END(skb, start) \
+({	struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \
+	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
+	RTA_NEST_END(skb, __nest); \
+	(skb)->len; })
+
 #define RTA_NEST_CANCEL(skb, start) \
 ({	if (start) \
 		skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
-- 
cgit v1.2.3


From 2371baa4bdab3268b32009926f75e7a5d3a41506 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 26 Jun 2007 03:23:44 -0700
Subject: [RTNETLINK]: Fix rtnetlink compat attribute patch

Sent the wrong patch previously.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6731e7f4cc0f..c91476ce314a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -570,12 +570,16 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 }
 
 extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
-extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
-				      struct rtattr *rta, void **data, int len);
+extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
+				        struct rtattr *rta, int len);
 
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
+#define rtattr_parse_nested_compat(tb, max, rta, data, len) \
+({	data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
+	__rtattr_parse_nested_compat(tb, max, rta, len); })
+
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
 extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
-- 
cgit v1.2.3


From 59fbb3a61e02deaeaa4fb50792217921f3002d64 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Tue, 26 Jun 2007 23:56:32 -0700
Subject: [IPV6] MIP6: Loadable module support for MIPv6.

This patch makes MIPv6 loadable module named "mip6".

Here is a modprobe.conf(5) example to load it automatically
when user application uses XFRM state for MIPv6:

alias xfrm-type-10-43 mip6
alias xfrm-type-10-60 mip6

Some MIPv6 feature is not included by this modular, however,
it should not be affected to other features like either IPsec
or IPv6 with and without the patch.
We may discuss XFRM, MH (RAW socket) and ancillary data/sockopt
separately for future work.

Loadable features:
* MH receiving check (to send ICMP error back)
* RO header parsing and building (i.e. RH2 and HAO in DSTOPTS)
* XFRM policy/state database handling for RO

These are NOT covered as loadable:
* Home Address flags and its rule on source address selection
* XFRM sub policy (depends on its own kernel option)
* XFRM functions to receive RO as IPv6 extension header
* MH sending/receiving through raw socket if user application
  opens it (since raw socket allows to do so)
* RH2 sending as ancillary data
* RH2 operation with setsockopt(2)

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 648bd1f0912d..213b63be3c8f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -247,7 +247,7 @@ struct inet6_skb_parm {
 	__u16			lastopt;
 	__u32			nhoff;
 	__u16			flags;
-#ifdef CONFIG_IPV6_MIP6
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16			dsthao;
 #endif
 
-- 
cgit v1.2.3


From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 27 Jun 2007 00:47:37 -0700
Subject: [NET]: IPV6 checksum offloading in network devices

The existing model for checksum offload does not correctly handle
devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag
implies device can do any arbitrary protocol.

This patch:
 * adds NETIF_F_IPV6_CSUM for those devices
 * fixes bnx2 and tg3 devices that need it
 * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO)
 * fixes assumptions about NETIF_F_ALL_CSUM in nat
 * adjusts bridge union of checksumming computation

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7913ee5581c..7a8f22fb4eee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,9 +314,10 @@ struct net_device
 	/* Net device features */
 	unsigned long		features;
 #define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
 #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
 #define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
 #define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
 #define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
 #define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
@@ -338,8 +339,11 @@ struct net_device
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
 
+
 #define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-#define NETIF_F_ALL_CSUM	(NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
+#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
+#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
+#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
 
 	struct net_device	*next_sched;
 
-- 
cgit v1.2.3


From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:26:19 -0700
Subject: [NET]: dev: introduce generic net_device address lists

Introduce struct dev_addr_list and list maintenance functions
based on dev_mc_list and the related functions. This will be
used by follow-up patches for both multicast and secondary
unicast addresses.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7a8f22fb4eee..aa389c77aa3e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -177,6 +177,14 @@ struct netif_rx_stats
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
 
+struct dev_addr_list
+{
+	struct dev_addr_list	*next;
+	u8			da_addr[MAX_ADDR_LEN];
+	u8			da_addrlen;
+	int			da_users;
+	int			da_gusers;
+};
 
 /*
  *	We tag multicasts with these structures.
@@ -1008,6 +1016,9 @@ extern void		dev_mc_upload(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
+extern int 		__dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all);
+extern int		__dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly);
+extern void		__dev_addr_discard(struct dev_addr_list **list);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-- 
cgit v1.2.3


From 3fba5a8b1e3df2384b90493538161e83cf15dd5f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:26:58 -0700
Subject: [NET]: dev_mcast: switch to generic net_device address lists

Use generic net_device address lists for multicast list handling.
Some defines are used to keep drivers working.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aa389c77aa3e..9e114e77e54d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -189,15 +189,12 @@ struct dev_addr_list
 /*
  *	We tag multicasts with these structures.
  */
- 
-struct dev_mc_list
-{	
-	struct dev_mc_list	*next;
-	__u8			dmi_addr[MAX_ADDR_LEN];
-	unsigned char		dmi_addrlen;
-	int			dmi_users;
-	int			dmi_gusers;
-};
+
+#define dev_mc_list	dev_addr_list
+#define dmi_addr	da_addr
+#define dmi_addrlen	da_addrlen
+#define dmi_users	da_users
+#define dmi_gusers	da_gusers
 
 struct hh_cache
 {
@@ -400,7 +397,7 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	struct dev_mc_list	*mc_list;	/* Multicast mac addresses	*/
+	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
 	int			mc_count;	/* Number of installed mcasts	*/
 	int			promiscuity;
 	int			allmulti;
-- 
cgit v1.2.3


From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Jun 2007 01:28:10 -0700
Subject: [NET]: dev: secondary unicast address support

Add support for configuring secondary unicast addresses on network
devices. To support this devices capable of filtering multiple
unicast addresses need to change their set_multicast_list function
to configure unicast filters as well and assign it to dev->set_rx_mode
instead of dev->set_multicast_list. Other devices are put into promiscous
mode when secondary unicast addresses are present.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e114e77e54d..2c0cc19edfb2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -397,6 +397,9 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
+	struct dev_addr_list	*uc_list;	/* Secondary unicast mac addresses */
+	int			uc_count;	/* Number of installed ucasts	*/
+	int			uc_promisc;
 	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
 	int			mc_count;	/* Number of installed mcasts	*/
 	int			promiscuity;
@@ -502,6 +505,8 @@ struct net_device
 						void *saddr,
 						unsigned len);
 	int			(*rebuild_header)(struct sk_buff *skb);
+#define HAVE_SET_RX_MODE
+	void			(*set_rx_mode)(struct net_device *dev);
 #define HAVE_MULTICAST			 
 	void			(*set_multicast_list)(struct net_device *dev);
 #define HAVE_SET_MAC_ADDR  		 
@@ -1008,8 +1013,11 @@ extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 				       void (*setup)(struct net_device *));
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
-/* Functions used for multicast support */
-extern void		dev_mc_upload(struct net_device *dev);
+/* Functions used for secondary unicast and multicast support */
+extern void		dev_set_rx_mode(struct net_device *dev);
+extern void		__dev_set_rx_mode(struct net_device *dev);
+extern int		dev_unicast_delete(struct net_device *dev, void *addr, int alen);
+extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
-- 
cgit v1.2.3


From 342f0234c71b40da785dd6a7ce1dd481ecbfdb81 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:37:46 -0700
Subject: [UDP]: Introduce UDP encapsulation type for L2TP

This patch adds a new UDP_ENCAP_L2TPINUDP encapsulation type for UDP
sockets. When a UDP socket's encap_type is UDP_ENCAP_L2TPINUDP, the
skb is delivered to a function pointed to by the udp_sock's
encap_rcv funcptr. If the skb isn't wanted by L2TP, it returns >0, which
causes it to be passed through to UDP.

Include padding to put the new encap_rcv field on a 4-byte boundary.

Previously, the only user of UDP encap sockets was ESP, so when
CONFIG_XFRM was not defined, some of the encap code was compiled
out. This patch changes that. As a result, udp_encap_rcv() will
now do a little more work when CONFIG_XFRM is not defined.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 6de445c31a64..8ec703f462da 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -42,6 +42,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
 #define UDP_ENCAP_ESPINUDP	2 /* draft-ietf-ipsec-udp-encaps-06 */
+#define UDP_ENCAP_L2TPINUDP	3 /* rfc2661 */
 
 #ifdef __KERNEL__
 #include <linux/types.h>
@@ -70,6 +71,11 @@ struct udp_sock {
 #define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
 #define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
 	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
+	__u8		 unused[3];
+	/*
+	 * For encapsulation sockets.
+	 */
+	int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
-- 
cgit v1.2.3


From cf14a4d06742d59ecb2d837a3f53bb24d1ff9acb Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 27 Jun 2007 15:43:43 -0700
Subject: [L2TP]: Changes to existing ppp and socket kernel headers for L2TP

Add struct sockaddr_pppol2tp to carry L2TP-specific address
information for the PPPoX (PPPoL2TP) socket. Unfortunately we can't
use the union inside struct sockaddr_pppox because the L2TP-specific
data is larger than the current size of the union and we must preserve
the size of struct sockaddr_pppox for binary compatibility.

Also add a PPPIOCGL2TPSTATS ioctl to allow userspace to obtain
L2TP counters and state from the kernel.

Add new if_pppol2tp.h header.

[ Modified to use aligned_u64 in statistics structure -DaveM ]

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild        |  1 +
 include/linux/if_ppp.h      | 16 +++++++++++
 include/linux/if_pppol2tp.h | 69 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_pppox.h    | 16 +++++++++--
 include/linux/socket.h      |  1 +
 5 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/if_pppol2tp.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index d94451682761..127d2d192b5a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -225,6 +225,7 @@ unifdef-y += if_fddi.h
 unifdef-y += if_frad.h
 unifdef-y += if_ltalk.h
 unifdef-y += if_link.h
+unifdef-y += if_pppol2tp.h
 unifdef-y += if_pppox.h
 unifdef-y += if_shaper.h
 unifdef-y += if_tr.h
diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h
index 768372f07caa..0f2f70d4e48c 100644
--- a/include/linux/if_ppp.h
+++ b/include/linux/if_ppp.h
@@ -110,6 +110,21 @@ struct ifpppcstatsreq {
 	struct ppp_comp_stats stats;
 };
 
+/* For PPPIOCGL2TPSTATS */
+struct pppol2tp_ioc_stats {
+	__u16		tunnel_id;	/* redundant */
+	__u16		session_id;	/* if zero, get tunnel stats */
+	__u32		using_ipsec:1;	/* valid only for session_id == 0 */
+	aligned_u64	tx_packets;
+	aligned_u64	tx_bytes;
+	aligned_u64	tx_errors;
+	aligned_u64	rx_packets;
+	aligned_u64	rx_bytes;
+	aligned_u64	rx_seq_discards;
+	aligned_u64	rx_oos_packets;
+	aligned_u64	rx_errors;
+};
+
 #define ifr__name       b.ifr_ifrn.ifrn_name
 #define stats_ptr       b.ifr_ifru.ifru_data
 
@@ -146,6 +161,7 @@ struct ifpppcstatsreq {
 #define PPPIOCDISCONN	_IO('t', 57)		/* disconnect channel */
 #define PPPIOCATTCHAN	_IOW('t', 56, int)	/* attach to ppp channel */
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
+#define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
new file mode 100644
index 000000000000..516203b6fdeb
--- /dev/null
+++ b/include/linux/if_pppol2tp.h
@@ -0,0 +1,69 @@
+/***************************************************************************
+ * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661)
+ *
+ * This file supplies definitions required by the PPP over L2TP driver
+ * (pppol2tp.c).  All version information wrt this file is located in pppol2tp.c
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __LINUX_IF_PPPOL2TP_H
+#define __LINUX_IF_PPPOL2TP_H
+
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#include <linux/in.h>
+#endif
+
+/* Structure used to connect() the socket to a particular tunnel UDP
+ * socket.
+ */
+struct pppol2tp_addr
+{
+	pid_t	pid;			/* pid that owns the fd.
+					 * 0 => current */
+	int	fd;			/* FD of UDP socket to use */
+
+	struct sockaddr_in addr;	/* IP address and port to send to */
+
+	__be16 s_tunnel, s_session;	/* For matching incoming packets */
+	__be16 d_tunnel, d_session;	/* For sending outgoing packets */
+};
+
+/* Socket options:
+ * DEBUG	- bitmask of debug message categories
+ * SENDSEQ	- 0 => don't send packets with sequence numbers
+ *		  1 => send packets with sequence numbers
+ * RECVSEQ	- 0 => receive packet sequence numbers are optional
+ *		  1 => drop receive packets without sequence numbers
+ * LNSMODE	- 0 => act as LAC.
+ *		  1 => act as LNS.
+ * REORDERTO	- reorder timeout (in millisecs). If 0, don't try to reorder.
+ */
+enum {
+	PPPOL2TP_SO_DEBUG	= 1,
+	PPPOL2TP_SO_RECVSEQ	= 2,
+	PPPOL2TP_SO_SENDSEQ	= 3,
+	PPPOL2TP_SO_LNSMODE	= 4,
+	PPPOL2TP_SO_REORDERTO	= 5,
+};
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	PPPOL2TP_MSG_DEBUG	= (1 << 0),	/* verbose debug (if
+						 * compiled in) */
+	PPPOL2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	PPPOL2TP_MSG_SEQ	= (1 << 2),	/* sequence numbers */
+	PPPOL2TP_MSG_DATA	= (1 << 3),	/* data packets */
+};
+
+
+
+#endif
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 6f987be60fe2..25652545ba6e 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -27,6 +27,7 @@
 #include <asm/semaphore.h>
 #include <linux/ppp_channel.h>
 #endif /* __KERNEL__ */
+#include <linux/if_pppol2tp.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
@@ -50,8 +51,9 @@ struct pppoe_addr{
  * Protocols supported by AF_PPPOX 
  */ 
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
-#define PX_MAX_PROTO   1	
- 
+#define PX_PROTO_OL2TP 1 /* Now L2TP also */
+#define PX_MAX_PROTO   2
+
 struct sockaddr_pppox { 
        sa_family_t     sa_family;            /* address family, AF_PPPOX */ 
        unsigned int    sa_protocol;          /* protocol identifier */ 
@@ -60,6 +62,16 @@ struct sockaddr_pppox {
        }sa_addr; 
 }__attribute__ ((packed)); 
 
+/* The use of the above union isn't viable because the size of this
+ * struct must stay fixed over time -- applications use sizeof(struct
+ * sockaddr_pppox) to fill it. We use a protocol specific sockaddr
+ * type instead.
+ */
+struct sockaddr_pppol2tp {
+	sa_family_t     sa_family;      /* address family, AF_PPPOX */
+	unsigned int    sa_protocol;    /* protocol identifier */
+	struct pppol2tp_addr pppol2tp;
+}__attribute__ ((packed));
 
 /*********************************************************************
  *
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6e7c9483a6a6..fe195c97a89d 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define SOL_NETLINK	270
 #define SOL_TIPC	271
 #define SOL_RXRPC	272
+#define SOL_PPPOL2TP	273
 
 /* IPX options */
 #define IPX_TYPE	1
-- 
cgit v1.2.3


From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 6 Jul 2007 13:36:20 -0700
Subject: [CORE] Stack changes to add multiqueue hardware support API

Add the multiqueue hardware device support API to the core network
stack.  Allow drivers to allocate multiple queues and manage them at
the netdev level if they choose to do so.

Added a new field to sk_buff, namely queue_mapping, for drivers to
know which tx_ring to select based on OS classification of the flow.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  3 +-
 include/linux/netdevice.h   | 80 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/skbuff.h      | 25 ++++++++++++--
 3 files changed, 99 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89efd0f..6cdb97365e47 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void		eth_header_cache_update(struct hh_cache *hh, struct net_device *dev
 extern int		eth_header_cache(struct neighbour *neigh,
 					 struct hh_cache *hh);
 
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c0cc19edfb2..9817821729c4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+struct net_device_subqueue
+{
+	/* Give a control state for each queue.  This struct may contain
+	 * per-queue locks in the future.
+	 */
+	unsigned long   state;
+};
+
 /*
  *	Network device statistics. Akin to the 2.0 ether stats but
  *	with byte counters.
@@ -331,6 +339,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_GSO		2048	/* Enable software GSO. */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_MULTI_QUEUE	16384	/* Has multiple TX/RX queues */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -557,6 +566,10 @@ struct net_device
 
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
+
+	/* The TX queue control structures */
+	unsigned int			egress_subqueue_count;
+	struct net_device_subqueue	egress_subqueue[0];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -565,9 +578,7 @@ struct net_device
 
 static inline void *netdev_priv(const struct net_device *dev)
 {
-	return (char *)dev + ((sizeof(struct net_device)
-					+ NETDEV_ALIGN_CONST)
-				& ~NETDEV_ALIGN_CONST);
+	return dev->priv;
 }
 
 #define SET_MODULE_OWNER(dev) do { } while (0)
@@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev)
 	return test_bit(__LINK_STATE_START, &dev->state);
 }
 
+/*
+ * Routines to manage the subqueues on a device.  We only need start
+ * stop, and a check if it's stopped.  All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+	if (netpoll_trap())
+		return;
+#endif
+	set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+					 u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	return test_bit(__LINK_STATE_XOFF,
+			&dev->egress_subqueue[queue_index].state);
+#else
+	return 0;
+#endif
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+	if (netpoll_trap())
+		return;
+#endif
+	if (test_and_clear_bit(__LINK_STATE_XOFF,
+			       &dev->egress_subqueue[queue_index].state))
+		__netif_schedule(dev);
+#endif
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	return (!!(NETIF_F_MULTI_QUEUE & dev->features));
+#else
+	return 0;
+#endif
+}
 
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
@@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev)
 extern void		ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
-				       void (*setup)(struct net_device *));
+extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+				       void (*setup)(struct net_device *),
+				       unsigned int queue_count);
+#define alloc_netdev(sizeof_priv, name, setup) \
+	alloc_netdev_mq(sizeof_priv, name, setup, 1)
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 /* Functions used for secondary unicast and multicast support */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 881fe80f01d0..2d6a14f5f2f1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@sk: Socket we are owned by
  *	@tstamp: Time we arrived
  *	@dev: Device we arrived on/are leaving by
- *	@iif: ifindex of device we arrived on
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
@@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@nfctinfo: Relationship of this skb to the connection
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
+ *	@iif: ifindex of device we arrived on
+ *	@queue_mapping: Queue mapping for multiqueue devices
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@dma_cookie: a cookie to one of several possible DMA operations
@@ -246,8 +247,6 @@ struct sk_buff {
 	struct sock		*sk;
 	ktime_t			tstamp;
 	struct net_device	*dev;
-	int			iif;
-	/* 4 byte hole on 64 bit*/
 
 	struct  dst_entry	*dst;
 	struct	sec_path	*sp;
@@ -290,12 +289,18 @@ struct sk_buff {
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
+
+	int			iif;
+	__u16			queue_mapping;
+
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
 	__u16			tc_verd;	/* traffic control verdict */
 #endif
 #endif
+	/* 2 byte hole */
+
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
 #endif
@@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb)
 { }
 #endif
 
+static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	skb->queue_mapping = queue_mapping;
+#endif
+}
+
+static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	to->queue_mapping = from->queue_mapping;
+#endif
+}
+
 static inline int skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
-- 
cgit v1.2.3


From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Thu, 28 Jun 2007 21:04:31 -0700
Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue

Add the new sch_rr qdisc for multiqueue network device support.  Allow
sch_prio and sch_rr to be compiled with or without multiqueue hardware
support.

sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS.  This
was done since sch_prio and sch_rr only differ in their dequeue
routine.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f35338507..268c51599eb8 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -101,6 +101,15 @@ struct tc_prio_qopt
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
 
+enum
+{
+	TCA_PRIO_UNSPEC,
+	TCA_PRIO_MQ,
+	__TCA_PRIO_MAX
+};
+
+#define TCA_PRIO_MAX    (__TCA_PRIO_MAX - 1)
+
 /* TBF section */
 
 struct tc_tbf_qopt
-- 
cgit v1.2.3


From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 30 Jun 2007 13:35:52 -0700
Subject: [NET]: Fix secondary unicast/multicast address count maintenance

When a reference to an existing address is increased or decreased without
hitting zero, the address count is incorrectly adjusted.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9817821729c4..8590d685d935 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1091,8 +1091,8 @@ extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern void		dev_mc_discard(struct net_device *dev);
-extern int 		__dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all);
-extern int		__dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly);
+extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
+extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
 extern void		__dev_addr_discard(struct dev_addr_list **list);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
-- 
cgit v1.2.3


From 8c644623fe7e41f59fe97cdf666cba3cb7ced7d8 Mon Sep 17 00:00:00 2001
From: Guido Guenther <agx@sigxcpu.org>
Date: Mon, 2 Jul 2007 22:50:25 -0700
Subject: [NET]: Allow group ownership of TUN/TAP devices.

Introduce a new syscall TUNSETGROUP for group ownership setting of tap
devices. The user now is allowed to send packages if either his euid or
his egid matches the one specified via tunctl (via -u or -g
respecitvely). If both, gid and uid, are set via tunctl, both have to
match.

Signed-off-by: Guido Guenther <agx@sigxcpu.org>
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tun.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 88aef7b86ef4..42eb6945b93e 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -36,6 +36,7 @@ struct tun_struct {
 	unsigned long 		flags;
 	int			attached;
 	uid_t			owner;
+	gid_t			group;
 
 	wait_queue_head_t	read_wait;
 	struct sk_buff_head	readq;
@@ -78,6 +79,7 @@ struct tun_struct {
 #define TUNSETPERSIST _IOW('T', 203, int) 
 #define TUNSETOWNER   _IOW('T', 204, int)
 #define TUNSETLINK    _IOW('T', 205, int)
+#define TUNSETGROUP   _IOW('T', 206, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit v1.2.3


From 89da1ecf5483e6aa29b456a15ad6d05a6797c5a5 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Mon, 2 Jul 2007 22:54:18 -0700
Subject: [IrDA]: Netlink layer.

First IrDA configuration netlink layer implementation.
Currently, we only support the set/get mode commands.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/irda.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irda.h b/include/linux/irda.h
index 945ba3110874..35911bd4dbe8 100644
--- a/include/linux/irda.h
+++ b/include/linux/irda.h
@@ -216,6 +216,33 @@ struct if_irda_req {
 #define ifr_dtr       ifr_ifru.ifru_line.dtr
 #define ifr_rts       ifr_ifru.ifru_line.rts
 
+
+/* IrDA netlink definitions */
+#define IRDA_NL_NAME "irda"
+#define IRDA_NL_VERSION 1
+
+enum irda_nl_commands {
+	IRDA_NL_CMD_UNSPEC,
+	IRDA_NL_CMD_SET_MODE,
+	IRDA_NL_CMD_GET_MODE,
+
+	__IRDA_NL_CMD_AFTER_LAST
+};
+#define IRDA_NL_CMD_MAX (__IRDA_NL_CMD_AFTER_LAST - 1)
+
+enum nl80211_attrs {
+	IRDA_NL_ATTR_UNSPEC,
+	IRDA_NL_ATTR_IFNAME,
+	IRDA_NL_ATTR_MODE,
+
+	__IRDA_NL_ATTR_AFTER_LAST
+};
+#define IRDA_NL_ATTR_MAX (__IRDA_NL_ATTR_AFTER_LAST - 1)
+
+/* IrDA modes */
+#define IRDA_MODE_PRIMARY   0x1
+#define IRDA_MODE_SECONDARY 0x2
+
 #endif /* KERNEL_IRDA_H */
 
 
-- 
cgit v1.2.3


From 411725280bd0058ebb83c0e32133b7a94902c3a6 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Mon, 2 Jul 2007 22:55:31 -0700
Subject: [IrDA]: Monitor mode.

Through the IrDA netlink set mode command, we switch to IrDA monitor
mode, where one IrLAP instance receives all the packets on the media,
without ever responding to them.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/irda.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irda.h b/include/linux/irda.h
index 35911bd4dbe8..8e3735714c1c 100644
--- a/include/linux/irda.h
+++ b/include/linux/irda.h
@@ -242,6 +242,7 @@ enum nl80211_attrs {
 /* IrDA modes */
 #define IRDA_MODE_PRIMARY   0x1
 #define IRDA_MODE_SECONDARY 0x2
+#define IRDA_MODE_MONITOR   0x4
 
 #endif /* KERNEL_IRDA_H */
 
-- 
cgit v1.2.3


From 7bfe24611671ec76b44281e582b38535e21f01a9 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki@netfilter.org>
Date: Sat, 7 Jul 2007 22:14:23 -0700
Subject: [NETFILTER]: ip6_tables: fix explanation of valid upper protocol
 number

This explains the allowed upper protocol numbers. IP6T_F_NOPROTO was
introduced to use 0 as Hop-by-Hop option header, not wildcard. But that
seemed to be forgotten. 0 has been used as wildcard since 2002-08-23.

Signed-off-by: Yasuyuki Kozakai <yasuyuki@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 4686f8342cbd..9a720f05888f 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -44,8 +44,14 @@ struct ip6t_ip6 {
 	char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
 	unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
 
-	/* ARGH, HopByHop uses 0, so can't do 0 = ANY,
-	   instead IP6T_F_NOPROTO must be set */
+	/* Upper protocol number
+	 * - The allowed value is 0 (any) or protocol number of last parsable
+	 *   header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or
+	 *   the non IPv6 extension headers.
+	 * - The protocol numbers of IPv6 extension headers except of ESP and
+	 *   MH do not match any packets.
+	 * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol.
+	 */
 	u_int16_t proto;
 	/* TOS to match iff flags & IP6T_F_TOS */
 	u_int8_t tos;
-- 
cgit v1.2.3


From cff533ac12494fa002e2c46acc94d670e5f636a2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:15:12 -0700
Subject: [NETFILTER]: x_tables: switch hotdrop to bool

Switch the "hotdrop" variables to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 7e733a6ba4f6..b8577d18d10d 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -148,7 +148,7 @@ struct xt_match
 		     const void *matchinfo,
 		     int offset,
 		     unsigned int protoff,
-		     int *hotdrop);
+		     bool *hotdrop);
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
-- 
cgit v1.2.3


From 1d93a9cbad608f6398ba6c5b588c504ccd35a2ca Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:15:35 -0700
Subject: [NETFILTER]: x_tables: switch xt_match->match to bool

Switch the return type of match functions to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b8577d18d10d..304fce356a43 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -141,14 +141,14 @@ struct xt_match
 	/* Arguments changed since 2.6.9, as this must now handle
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
-	int (*match)(const struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     const struct xt_match *match,
-		     const void *matchinfo,
-		     int offset,
-		     unsigned int protoff,
-		     bool *hotdrop);
+	bool (*match)(const struct sk_buff *skb,
+		      const struct net_device *in,
+		      const struct net_device *out,
+		      const struct xt_match *match,
+		      const void *matchinfo,
+		      int offset,
+		      unsigned int protoff,
+		      bool *hotdrop);
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
-- 
cgit v1.2.3


From ccb79bdce71f2c04cfa9bfcbaf4d37e2f963d684 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:16:00 -0700
Subject: [NETFILTER]: x_tables: switch xt_match->checkentry to bool

Switch the return type of match functions to boolean

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 304fce356a43..5130dd60a2fc 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -152,11 +152,11 @@ struct xt_match
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
-	int (*checkentry)(const char *tablename,
-			  const void *ip,
-			  const struct xt_match *match,
-			  void *matchinfo,
-			  unsigned int hook_mask);
+	bool (*checkentry)(const char *tablename,
+			   const void *ip,
+			   const struct xt_match *match,
+			   void *matchinfo,
+			   unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_match *match, void *matchinfo);
-- 
cgit v1.2.3


From e1931b784a8de324abf310fa3b5e3f25d3988233 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:16:26 -0700
Subject: [NETFILTER]: x_tables: switch xt_target->checkentry to bool

Switch the return type of target checkentry functions to boolean.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5130dd60a2fc..64f425a855bb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -202,11 +202,11 @@ struct xt_target
            hook_mask is a bitmask of hooks from which it can be
            called. */
 	/* Should return true or false. */
-	int (*checkentry)(const char *tablename,
-			  const void *entry,
-			  const struct xt_target *target,
-			  void *targinfo,
-			  unsigned int hook_mask);
+	bool (*checkentry)(const char *tablename,
+			   const void *entry,
+			   const struct xt_target *target,
+			   void *targinfo,
+			   unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_target *target, void *targinfo);
-- 
cgit v1.2.3


From 1b50b8a371e90a5e110f466e4ac02cf6b5f681de Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 7 Jul 2007 22:20:36 -0700
Subject: [NETFILTER]: Add u32 match

Along comes... xt_u32, a revamped ipt_u32 from POM-NG,
Plus:

    *	2007-06-02: added ipv6 support

    *	2007-06-05: uses kmalloc for the big buffer

    *   2007-06-05: added inversion

    *   2007-06-20: use skb_copy_bits() and get rid of the big buffer
        and lock (suggested by Pablo Neira Ayuso)

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_u32.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/linux/netfilter/xt_u32.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h
new file mode 100644
index 000000000000..9947f56cdbdd
--- /dev/null
+++ b/include/linux/netfilter/xt_u32.h
@@ -0,0 +1,40 @@
+#ifndef _XT_U32_H
+#define _XT_U32_H 1
+
+enum xt_u32_ops {
+	XT_U32_AND,
+	XT_U32_LEFTSH,
+	XT_U32_RIGHTSH,
+	XT_U32_AT,
+};
+
+struct xt_u32_location_element {
+	u_int32_t number;
+	u_int8_t nextop;
+};
+
+struct xt_u32_value_element {
+	u_int32_t min;
+	u_int32_t max;
+};
+
+/*
+ * Any way to allow for an arbitrary number of elements?
+ * For now, I settle with a limit of 10 each.
+ */
+#define XT_U32_MAXSIZE 10
+
+struct xt_u32_test {
+	struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
+	struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
+	u_int8_t nnums;
+	u_int8_t nvalues;
+};
+
+struct xt_u32 {
+	struct xt_u32_test tests[XT_U32_MAXSIZE+1];
+	u_int8_t ntests;
+	u_int8_t invert;
+};
+
+#endif /* _XT_U32_H */
-- 
cgit v1.2.3


From ba9dda3ab5a865542e69dfe01edb2436857c9420 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Jul 2007 22:21:23 -0700
Subject: [NETFILTER]: x_tables: add TRACE target

The TRACE target can be used to follow IP and IPv6 packets through
the ruleset.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick NcHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2d6a14f5f2f1..625d73b07ab7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -227,6 +227,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@mark: Generic packet mark
  *	@nfct: Associated connection, if any
  *	@ipvs_property: skbuff is owned by ipvs
+ *	@nf_trace: netfilter packet trace flag
  *	@nfctinfo: Relationship of this skb to the connection
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
@@ -278,7 +279,8 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-				ipvs_property:1;
+				ipvs_property:1,
+				nf_trace:1;
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
-- 
cgit v1.2.3


From d3c3f4243e135b3d8c41d98be0cb2f54a4141abf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:38:30 -0700
Subject: [NETFILTER]: ipt_CLUSTERIP: add compat code

Adjust structure size and don't expect pointers passed in from
userspace to be valid. Also replace an enum in an ABI structure
by a fixed size type.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index d9bceedfb3dc..daf50be22c9d 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -18,13 +18,13 @@ struct clusterip_config;
 struct ipt_clusterip_tgt_info {
 
 	u_int32_t flags;
-	
+
 	/* only relevant for new ones */
 	u_int8_t clustermac[6];
 	u_int16_t num_total_nodes;
 	u_int16_t num_local_nodes;
 	u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
-	enum clusterip_hashmode hash_mode;
+	u_int32_t hash_mode;
 	u_int32_t hash_initval;
 
 	struct clusterip_config *config;
-- 
cgit v1.2.3


From 0d53778e81ac7af266dac8a20cc328328c327112 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 7 Jul 2007 22:39:38 -0700
Subject: [NETFILTER]: Convert DEBUGP to pr_debug

Convert DEBUGP to pr_debug and fix lots of non-compiling debug statements.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_pptp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index 9d8144a488cd..c93061f33144 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -4,6 +4,8 @@
 
 #include <linux/netfilter/nf_conntrack_common.h>
 
+extern const char *pptp_msg_name[];
+
 /* state of the control session */
 enum pptp_ctrlsess_state {
 	PPTP_SESSION_NONE,			/* no session present */
-- 
cgit v1.2.3


From ce7663d84a87bb4e1743f62950bf7dceed723a13 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 7 Jul 2007 22:40:08 -0700
Subject: [NETFILTER]: nfnetlink_queue: don't unregister handler of other
 subsystem

The queue handlers registered by ip[6]_queue.ko at initialization should
not be unregistered according to requests from userland program
using nfnetlink_queue. If we allow that, there is no way to register
the handlers of built-in ip[6]_queue again.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 10b5c6275706..0eed0b7ab2df 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -275,7 +275,8 @@ struct nf_queue_handler {
 };
 extern int nf_register_queue_handler(int pf, 
                                      struct nf_queue_handler *qh);
-extern int nf_unregister_queue_handler(int pf);
+extern int nf_unregister_queue_handler(int pf,
+				       struct nf_queue_handler *qh);
 extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh);
 extern void nf_reinject(struct sk_buff *skb,
 			struct nf_info *info,
-- 
cgit v1.2.3


From c6c6e3e05c0b4349824efcdd36650e7be9d5c7c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Jul 2007 22:41:55 -0700
Subject: [NET]: Update comments for skb checksums

Rusty (whose comments we should all study and emulate :) pointed
out that our comments for skb checksums are no longer up-to-date.
So here is a patch to

1) add the case of partial checksums on input;
2) update partial checksum case to mention csum_start/csum_offset;
3) mention the new IPv6 feature bit.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 625d73b07ab7..9391e4a4c344 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -65,13 +65,20 @@
  *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
+ *	PARTIAL: identical to the case for output below.  This may occur
+ *	    on a packet received directly from another Linux OS, e.g.,
+ *	    a virtualised Linux kernel on the same host.  The packet can
+ *	    be treated in the same way as UNNECESSARY except that on
+ *	    output (i.e., forwarding) the checksum must be filled in
+ *	    by the OS or the hardware.
+ *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
  *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
- *	from skb->transport_header to the end and to record the checksum
- *	at skb->transport_header + skb->csum.
+ *	from skb->csum_start to the end and to record the checksum
+ *	at skb->csum_start + skb->csum_offset.
  *
  *	Device must show its capabilities in dev->features, set
  *	at device setup time.
@@ -82,6 +89,7 @@
  *			  TCP/UDP over IPv4. Sigh. Vendors like this
  *			  way by an unknown reason. Though, see comment above
  *			  about CHECKSUM_UNNECESSARY. 8)
+ *	NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
  *
  *	Any questions? No questions, good. 		--ANK
  */
-- 
cgit v1.2.3


From bb4dbf9e61d0801927e7df2569bb3dd8287ea301 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 10 Jul 2007 22:55:49 -0700
Subject: [IPV6]: Do not send RH0 anymore.

Based on <draft-ietf-ipv6-deprecate-rh0-00.txt>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 213b63be3c8f..cb3118cf277c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -27,8 +27,8 @@ struct in6_ifreq {
 	int		ifr6_ifindex; 
 };
 
-#define IPV6_SRCRT_STRICT	0x01	/* this hop must be a neighbor	*/
-#define IPV6_SRCRT_TYPE_0	0	/* IPv6 type 0 Routing Header	*/
+#define IPV6_SRCRT_STRICT	0x01	/* Deprecated; will be removed */
+#define IPV6_SRCRT_TYPE_0	0	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
 
 /*
-- 
cgit v1.2.3


From 4c752098f529f41abfc985426a3eca0f2cb96676 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 May 2007 13:28:48 +0900
Subject: [IPV6]: Make IPV6_{RECV,2292}RTHDR boolean options.

Because reversing RH0 is no longer supported by deprecation
of RH0, let's make IPV6_{RECV,2292}RTHDR boolean options.
Boolean are more appropriate from standard POV.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb3118cf277c..97983dc9df13 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -299,8 +299,8 @@ struct ipv6_pinfo {
 	/* pktoption flags */
 	union {
 		struct {
-			__u16	srcrt:2,
-				osrcrt:2,
+			__u16	srcrt:1,
+				osrcrt:1,
 			        rxinfo:1,
 			        rxoinfo:1,
 				rxhlim:1,
-- 
cgit v1.2.3


From e69ff734e15eb7f61621f8764ce0a2181823a737 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 8 Jun 2007 16:26:08 +1000
Subject: [CRYPTO] cipher: Remove obsolete fields from cipher_tfm

This removes all the unused block cipher fields from cipher_tfm.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 0de7e2ace822..357e8cfedc37 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -295,28 +295,8 @@ struct blkcipher_tfm {
 };
 
 struct cipher_tfm {
-	void *cit_iv;
-	unsigned int cit_ivsize;
-	u32 cit_mode;
 	int (*cit_setkey)(struct crypto_tfm *tfm,
 	                  const u8 *key, unsigned int keylen);
-	int (*cit_encrypt)(struct crypto_tfm *tfm,
-			   struct scatterlist *dst,
-			   struct scatterlist *src,
-			   unsigned int nbytes);
-	int (*cit_encrypt_iv)(struct crypto_tfm *tfm,
-	                      struct scatterlist *dst,
-	                      struct scatterlist *src,
-	                      unsigned int nbytes, u8 *iv);
-	int (*cit_decrypt)(struct crypto_tfm *tfm,
-			   struct scatterlist *dst,
-			   struct scatterlist *src,
-			   unsigned int nbytes);
-	int (*cit_decrypt_iv)(struct crypto_tfm *tfm,
-			   struct scatterlist *dst,
-			   struct scatterlist *src,
-			   unsigned int nbytes, u8 *iv);
-	void (*cit_xor_block)(u8 *dst, const u8 *src);
 	void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 	void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
-- 
cgit v1.2.3


From d556ad4bbe75faf17b239e151a9f003322b2e851 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Tue, 15 May 2007 13:59:13 +0200
Subject: PCI: add PCI-X/PCI-Express read control interfaces

This patch introduces an interface to read and write PCI-X / PCI-Express
maximum read byte count values from PCI config space. There is a second
function that returns the maximum _designed_ read byte count, which marks the
maximum value for a device, since some drivers try to set MMRBC to the
highest allowed value and rely on such a function.

Based on patch set by Stephen Hemminger <shemminger@linux-foundation.org>

Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 086a0e5a6318..ac403d74a222 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -111,7 +111,8 @@ enum pcie_reset_state {
 
 typedef unsigned short __bitwise pci_bus_flags_t;
 enum pci_bus_flags {
-	PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
+	PCI_BUS_FLAGS_NO_MSI   = (__force pci_bus_flags_t) 1,
+	PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
 };
 
 struct pci_cap_saved_state {
@@ -549,6 +550,10 @@ void pci_intx(struct pci_dev *dev, int enable);
 void pci_msi_off(struct pci_dev *dev);
 int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
+int pcix_get_max_mmrbc(struct pci_dev *dev);
+int pcix_get_mmrbc(struct pci_dev *dev);
+int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
+int pcie_set_readrq(struct pci_dev *dev, int rq);
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i);
-- 
cgit v1.2.3


From 575e3348cb80c3265278756778d5091d5ca4efbf Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 8 May 2007 12:03:07 +1000
Subject: PCI: Use a weak symbol for the empty version of
 pcibios_add_platform_entries()

I'm not sure if this is going to fly, weak symbols work on the compilers I'm
using, but whether they work for all of the affected architectures I can't say.
I've cc'ed as many arch maintainers/lists as I could find.

But assuming they do, we can use a weak empty definition of
pcibios_add_platform_entries() to avoid having an empty definition on every
arch.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index ac403d74a222..18319aba1a57 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -881,5 +881,7 @@ extern int pci_pci_problems;
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
 
+extern void pcibios_add_platform_entries(struct pci_dev *dev);
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From a2cd52ca904f5913651e71764755e712894ccc2f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 8 May 2007 12:03:08 +1000
Subject: PCI: Make pcibios_add_platform_entries() return errors

Currently pcibios_add_platform_entries() returns void, but could fail,
so instead have it return an int and propagate errors up to
pci_create_sysfs_dev_files().

Fixes:
arch/powerpc/kernel/pci_64.c: In function 'pcibios_add_platform_entries':
arch/powerpc/kernel/pci_64.c:878: warning: ignoring return value of
	'device_create_file', declared with attribute warn_unused_result
arch/powerpc/kernel/pci_32.c: In function 'pcibios_add_platform_entries':
  arch/powerpc/kernel/pci_32.c:1043: warning: ignoring return value of
	'device_create_file', declared with attribute warn_unused_result

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18319aba1a57..483db814770e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -881,7 +881,7 @@ extern int pci_pci_problems;
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
 
-extern void pcibios_add_platform_entries(struct pci_dev *dev);
+extern int pcibios_add_platform_entries(struct pci_dev *dev);
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From adf809d01043d8808e47db2d35fc07b53062884e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 21 May 2007 14:16:17 -0700
Subject: + pci_find_slot-mark-deprecated.patch added to -mm tree

We've now fixed up most users of pci_find_slot, and the remainder are either
hard and need someone with the hardware and info to work on it, or patches
exist but are not yet merged.

Time therefore for some gentle encouragement

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 483db814770e..4db7b5a18f58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -476,7 +476,7 @@ extern void pci_sort_breadthfirst(void);
 /* Generic PCI functions exported to card drivers */
 
 struct pci_dev __deprecated *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from);
-struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
+struct pci_dev __deprecated *pci_find_slot (unsigned int bus, unsigned int devfn);
 int pci_find_capability (struct pci_dev *dev, int cap);
 int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap);
 int pci_find_ext_capability (struct pci_dev *dev, int cap);
-- 
cgit v1.2.3


From 65b3bc358a3195ebe459761a248cf33a61539947 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 6 Jun 2007 11:46:49 +0800
Subject: PCI aer: fix stub return values

The stubs used when advanced error reporting is not enabled
must have same return type as real functions.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Acked-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/aer.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index 402e178b38eb..64aacaed8d6c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -14,10 +14,10 @@ extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 #else
-#define pci_enable_pcie_error_reporting(dev)		do { } while (0)
-#define pci_find_aer_capability(dev)			do { } while (0)
-#define pci_disable_pcie_error_reporting(dev)		do { } while (0)
-#define pci_cleanup_aer_uncorrect_error_status(dev)	do { } while (0)
+#define pci_enable_pcie_error_reporting(dev)		(-EINVAL)
+#define pci_find_aer_capability(dev)			(0)
+#define pci_disable_pcie_error_reporting(dev)		(-EINVAL)
+#define pci_cleanup_aer_uncorrect_error_status(dev)	(-EINVAL)
 #endif
 
 #endif //_AER_H_
-- 
cgit v1.2.3


From f0dce411930d16a678173e534594bca160f5eaff Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 6 Jun 2007 11:50:34 +0800
Subject: PCI aer: add pci_cleanup_aer_correct_aer_status

Function to clear bogus correctable errors. Analog to pci_aer_uncorrect_are_status.
The Marvell chips seem to start out with a bogus value that needs to be
cleared.

Yanmin ported it to 2.6.22-rc4 by fixing a fuzz patch applying info.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Acked-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index 64aacaed8d6c..509656286e53 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -13,11 +13,13 @@ extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+extern int pci_cleanup_aer_correct_error_status(struct pci_dev *dev);
 #else
 #define pci_enable_pcie_error_reporting(dev)		(-EINVAL)
 #define pci_find_aer_capability(dev)			(0)
 #define pci_disable_pcie_error_reporting(dev)		(-EINVAL)
 #define pci_cleanup_aer_uncorrect_error_status(dev)	(-EINVAL)
+#define pci_cleanup_aer_correct_error_status(dev)	(-EINVAL)
 #endif
 
 #endif //_AER_H_
-- 
cgit v1.2.3


From 56906c612e10b5e32a48ccbe8a3c08ab6acf5a28 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Mon, 7 May 2007 10:26:17 -0700
Subject: PCI: remove useless pci driver method

Remove pointless and never-called enable_wake() hook from pci_driver and
from documentation.  Evidently this was introduced in the 2.4.6 kernel,
but there's no evidence it was ever called; and it was rarely implemented.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4db7b5a18f58..5be420ac6303 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,7 +371,6 @@ struct pci_driver {
 	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
-	int  (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable);   /* Enable wake event */
 	void (*shutdown) (struct pci_dev *dev);
 
 	struct pci_error_handlers *err_handler;
-- 
cgit v1.2.3


From b8a3a5214d7cc115f1ca3a3967b7229d97c46f4a Mon Sep 17 00:00:00 2001
From: Auke Kok <auke-jan.h.kok@intel.com>
Date: Fri, 8 Jun 2007 15:46:30 -0700
Subject: PCI: read revision ID by default

Currently there are 97 occurrences where drivers need the pci
revision ID. We can do this once for all devices. Even the pci
subsystem needs the revision several times for quirks. The extra
u8 member pads out nicely in the pci_dev struct.

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5be420ac6303..45332440a2e6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -139,6 +139,7 @@ struct pci_dev {
 	unsigned short	subsystem_vendor;
 	unsigned short	subsystem_device;
 	unsigned int	class;		/* 3 bytes: (base,sub,prog-if) */
+	u8		revision;	/* PCI revision, low byte of class word */
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;  		/* which interrupt pin this device uses */
-- 
cgit v1.2.3


From 1d0ed384c1f2582b6f7408642c77a78a0c410122 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 18 Jun 2007 10:58:13 +0200
Subject: PCI: ATM: lanai, change VENDOR to DEVICE

lanai, change VENDOR to DEVICE

There were 2 bad named macros in pci_ids (LANAI 2 and IHB). Rename it to
DEVICE, because it's device id. Also make some cleanpu in pci_device_id
table (use PCI_VDEVICE).

Cc: Mitchell Blank Jr <mitch@sfgoth.com>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 75c4d4d06892..a260a947c917 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1383,8 +1383,8 @@
 #define PCI_VENDOR_ID_EF		0x111a
 #define PCI_DEVICE_ID_EF_ATM_FPGA	0x0000
 #define PCI_DEVICE_ID_EF_ATM_ASIC	0x0002
-#define PCI_VENDOR_ID_EF_ATM_LANAI2	0x0003
-#define PCI_VENDOR_ID_EF_ATM_LANAIHB	0x0005
+#define PCI_DEVICE_ID_EF_ATM_LANAI2	0x0003
+#define PCI_DEVICE_ID_EF_ATM_LANAIHB	0x0005
 
 #define PCI_VENDOR_ID_IDT		0x111d
 #define PCI_DEVICE_ID_IDT_IDT77201	0x0001
-- 
cgit v1.2.3


From c43eaa02abf3b034a9694dcca5c177ecb6072f89 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 18 Jun 2007 10:58:14 +0200
Subject: PCI: i386: traps, change VENDOR to DEVICE

traps, change VENDOR to DEVICE

Change macro for SGI lithium (arch/i386/mach-visws/traps.c) device from
VENDOR to DEVICE, because it's a device id.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a260a947c917..228e0befeda1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -944,8 +944,8 @@
 
 #define PCI_VENDOR_ID_SGI		0x10a9
 #define PCI_DEVICE_ID_SGI_IOC3		0x0003
+#define PCI_DEVICE_ID_SGI_LITHIUM	0x1002
 #define PCI_DEVICE_ID_SGI_IOC4		0x100a
-#define PCI_VENDOR_ID_SGI_LITHIUM	0x1002
 
 
 #define PCI_VENDOR_ID_WINBOND		0x10ad
-- 
cgit v1.2.3


From 03966e097db1a3b7aff5d364277f2e66069923df Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 18 Jun 2007 10:55:30 +0200
Subject: PCI: pci_ids, reorder some entries

pci_ids, reorder some entries

Some lines are not vendor sorted, reorder it to comply with the rest of
document.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 228e0befeda1..0058fb920c74 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -133,6 +133,9 @@
 
 /* Vendors and devices.  Sort key: vendor first, device next. */
 
+#define PCI_VENDOR_ID_TTTECH		0x0357
+#define PCI_DEVICE_ID_TTTECH_MC322	0x000a
+
 #define PCI_VENDOR_ID_DYNALINK		0x0675
 #define PCI_DEVICE_ID_DYNALINK_IS64PH	0x1702
 
@@ -1902,6 +1905,8 @@
 #define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP	0x9523
 
+#define PCI_VENDOR_ID_CHELSIO		0x1425
+
 #define PCI_VENDOR_ID_SAMSUNG		0x144d
 
 #define PCI_VENDOR_ID_MYRICOM		0x14c1
@@ -2010,8 +2015,6 @@
 #define PCI_DEVICE_ID_ENE_720		0x1421
 #define PCI_DEVICE_ID_ENE_722		0x1422
 
-#define PCI_VENDOR_ID_CHELSIO		0x1425
-
 #define PCI_SUBVENDOR_ID_PERLE          0x155f
 #define PCI_SUBDEVICE_ID_PCI_RAS4       0xf001
 #define PCI_SUBDEVICE_ID_PCI_RAS8       0xf010
@@ -2035,6 +2038,9 @@
 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
 #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
 
+#define PCI_VENDOR_ID_QUICKNET		0x15e2
+#define PCI_DEVICE_ID_QUICKNET_XJ	0x0500
+
 #define PCI_VENDOR_ID_PDC		0x15e9
 
 
@@ -2412,13 +2418,7 @@
 #define PCI_DEVICE_ID_TIGERJET_300	0x0001
 #define PCI_DEVICE_ID_TIGERJET_100	0x0002
 
-#define PCI_VENDOR_ID_TTTECH		0x0357
-#define PCI_DEVICE_ID_TTTECH_MC322	0x000A
-
 #define PCI_VENDOR_ID_XILINX_RME	0xea60
 #define PCI_DEVICE_ID_RME_DIGI32	0x9896
 #define PCI_DEVICE_ID_RME_DIGI32_PRO	0x9897
 #define PCI_DEVICE_ID_RME_DIGI32_8	0x9898
-
-#define PCI_VENDOR_ID_QUICKNET		0x15E2
-#define PCI_DEVICE_ID_QUICKNET_XJ	0x0500
-- 
cgit v1.2.3


From f732ee0b71365ddc20e6a0b408f9fd1732d7eb75 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 18 Jun 2007 10:58:14 +0200
Subject: PCI: pci_ids, add atheros and 3com_2 vendors

pci_ids, add atheros and 3com_2 vendors

Atheros is wifi vendor. 3com_2 (0xa727) is an vendor id for one card with
ath chip.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0058fb920c74..0995e97b1ccf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2059,6 +2059,8 @@
 #define PCI_DEVICE_ID_BCM1250_PCI	0x0001
 #define PCI_DEVICE_ID_BCM1250_HT	0x0002
 
+#define PCI_VENDOR_ID_ATHEROS		0x168c
+
 #define PCI_VENDOR_ID_NETCELL		0x169c
 #define PCI_DEVICE_ID_REVOLUTION	0x0044
 
@@ -2410,6 +2412,8 @@
 #define PCI_DEVICE_ID_NETMOS_9845	0x9845
 #define PCI_DEVICE_ID_NETMOS_9855	0x9855
 
+#define PCI_VENDOR_ID_3COM_2		0xa727
+
 #define PCI_SUBVENDOR_ID_EXSYS		0xd84d
 #define PCI_SUBDEVICE_ID_EXSYS_4014	0x4014
 #define PCI_SUBDEVICE_ID_EXSYS_4055	0x4055
-- 
cgit v1.2.3


From 12bedda9f404c4d34eda6477b0ec32140d83501b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 18 Jun 2007 10:56:52 +0200
Subject: PCI: pci_ids, remove double or more empty lines

pci_ids, remove two or more empty lines

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 48 ------------------------------------------------
 1 file changed, 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0995e97b1ccf..93961e9d0308 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -735,7 +735,6 @@
 #define PCI_DEVICE_ID_ELSA_MICROLINK	0x1000
 #define PCI_DEVICE_ID_ELSA_QS3000	0x3000
 
-
 #define PCI_VENDOR_ID_BUSLOGIC		      0x104B
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER    0x1040
@@ -781,7 +780,6 @@
 
 #define PCI_VENDOR_ID_SONY		0x104d
 
-
 /* Winbond have two vendor IDs! See 0x10ad as well */
 #define PCI_VENDOR_ID_WINBOND2		0x1050
 #define PCI_DEVICE_ID_WINBOND2_89C940F	0x5a5a
@@ -819,7 +817,6 @@
 #define PCI_DEVICE_ID_PROMISE_20276	0x5275
 #define PCI_DEVICE_ID_PROMISE_20277	0x7275
 
-
 #define PCI_VENDOR_ID_UMC		0x1060
 #define PCI_DEVICE_ID_UMC_UM8673F	0x0101
 #define PCI_DEVICE_ID_UMC_UM8886BF	0x673a
@@ -835,7 +832,6 @@
 #define PCI_DEVICE_ID_MYLEX_DAC960_BA	0xBA56
 #define PCI_DEVICE_ID_MYLEX_DAC960_GEM	0xB166
 
-
 #define PCI_VENDOR_ID_APPLE		0x106b
 #define PCI_DEVICE_ID_APPLE_BANDIT	0x0001
 #define PCI_DEVICE_ID_APPLE_HYDRA	0x000e
@@ -871,7 +867,6 @@
 #define PCI_DEVICE_ID_YAMAHA_744	0x0010
 #define PCI_DEVICE_ID_YAMAHA_754	0x0012
 
-
 #define PCI_VENDOR_ID_QLOGIC		0x1077
 #define PCI_DEVICE_ID_QLOGIC_ISP10160	0x1016
 #define PCI_DEVICE_ID_QLOGIC_ISP1020	0x1020
@@ -902,12 +897,9 @@
 #define PCI_DEVICE_ID_CYRIX_5530_AUDIO	0x0103
 #define PCI_DEVICE_ID_CYRIX_5530_VIDEO	0x0104
 
-
-
 #define PCI_VENDOR_ID_CONTAQ		0x1080
 #define PCI_DEVICE_ID_CONTAQ_82C693	0xc693
 
-
 #define PCI_VENDOR_ID_OLICOM		0x108d
 #define PCI_DEVICE_ID_OLICOM_OC2325	0x0012
 #define PCI_DEVICE_ID_OLICOM_OC2183	0x0013
@@ -939,23 +931,19 @@
 #define PCI_DEVICE_ID_SII_3112		0x3112
 #define PCI_DEVICE_ID_SII_1210SA	0x0240
 
-
 #define PCI_VENDOR_ID_BROOKTREE		0x109e
 #define PCI_DEVICE_ID_BROOKTREE_878	0x0878
 #define PCI_DEVICE_ID_BROOKTREE_879	0x0879
 
-
 #define PCI_VENDOR_ID_SGI		0x10a9
 #define PCI_DEVICE_ID_SGI_IOC3		0x0003
 #define PCI_DEVICE_ID_SGI_LITHIUM	0x1002
 #define PCI_DEVICE_ID_SGI_IOC4		0x100a
 
-
 #define PCI_VENDOR_ID_WINBOND		0x10ad
 #define PCI_DEVICE_ID_WINBOND_82C105	0x0105
 #define PCI_DEVICE_ID_WINBOND_83C553	0x0565
 
-
 #define PCI_VENDOR_ID_PLX		0x10b5
 #define PCI_DEVICE_ID_PLX_R685		0x1030
 #define PCI_DEVICE_ID_PLX_ROMULUS	0x106a
@@ -989,7 +977,6 @@
 #define PCI_DEVICE_ID_3COM_3CR990SVR97	0x9909
 #define PCI_DEVICE_ID_3COM_3CR990SVR	0x990a
 
-
 #define PCI_VENDOR_ID_AL		0x10b9
 #define PCI_DEVICE_ID_AL_M1533		0x1533
 #define PCI_DEVICE_ID_AL_M1535 		0x1535
@@ -1012,18 +999,14 @@
 #define PCI_DEVICE_ID_AL_M5451		0x5451
 #define PCI_DEVICE_ID_AL_M7101		0x7101
 
-
-
 #define PCI_VENDOR_ID_NEOMAGIC		0x10c8
 #define PCI_DEVICE_ID_NEOMAGIC_NM256AV_AUDIO 0x8005
 #define PCI_DEVICE_ID_NEOMAGIC_NM256ZX_AUDIO 0x8006
 #define PCI_DEVICE_ID_NEOMAGIC_NM256XL_PLUS_AUDIO 0x8016
 
-
 #define PCI_VENDOR_ID_TCONRAD		0x10da
 #define PCI_DEVICE_ID_TCONRAD_TOKENRING	0x0508
 
-
 #define PCI_VENDOR_ID_NVIDIA			0x10de
 #define PCI_DEVICE_ID_NVIDIA_TNT		0x0020
 #define PCI_DEVICE_ID_NVIDIA_TNT2		0x0028
@@ -1244,9 +1227,6 @@
 #define PCI_DEVICE_ID_IMS_TT128		0x9128
 #define PCI_DEVICE_ID_IMS_TT3D		0x9135
 
-
-
-
 #define PCI_VENDOR_ID_INTERG		0x10ea
 #define PCI_DEVICE_ID_INTERG_1682	0x1682
 #define PCI_DEVICE_ID_INTERG_2000	0x2000
@@ -1265,7 +1245,6 @@
 #define PCI_DEVICE_ID_XILINX_HAMMERFALL_DSP 0x3fc5
 #define PCI_DEVICE_ID_XILINX_HAMMERFALL_DSP_MADI 0x3fc6
 
-
 #define PCI_VENDOR_ID_INIT		0x1101
 
 #define PCI_VENDOR_ID_CREATIVE		0x1102 /* duplicate: ECTIVA */
@@ -1360,7 +1339,6 @@
 #define PCI_VENDOR_ID_SIEMENS           0x110A
 #define PCI_DEVICE_ID_SIEMENS_DSCC4     0x2102
 
-
 #define PCI_VENDOR_ID_VORTEX		0x1119
 #define PCI_DEVICE_ID_VORTEX_GDT60x0	0x0000
 #define PCI_DEVICE_ID_VORTEX_GDT6000B	0x0001
@@ -1395,7 +1373,6 @@
 #define PCI_VENDOR_ID_FORE		0x1127
 #define PCI_DEVICE_ID_FORE_PCA200E	0x0300
 
-
 #define PCI_VENDOR_ID_PHILIPS		0x1131
 #define PCI_DEVICE_ID_PHILIPS_SAA7146	0x7146
 #define PCI_DEVICE_ID_PHILIPS_SAA9730	0x9730
@@ -1414,7 +1391,6 @@
 #define PCI_DEVICE_ID_ZIATECH_5550_HC	0x5550
  
 
-
 #define PCI_VENDOR_ID_SYSKONNECT	0x1148
 #define PCI_DEVICE_ID_SYSKONNECT_TR	0x4200
 #define PCI_DEVICE_ID_SYSKONNECT_GE	0x4300
@@ -1422,7 +1398,6 @@
 #define PCI_DEVICE_ID_SYSKONNECT_9DXX	0x4400
 #define PCI_DEVICE_ID_SYSKONNECT_9MXX	0x4500
 
-
 #define PCI_VENDOR_ID_DIGI		0x114f
 #define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E	0x0070
 #define PCI_DEVICE_ID_DIGI_DF_M_E	0x0071
@@ -1433,12 +1408,10 @@
 #define PCI_DEVICE_ID_NEO_2RJ45         0x00CA
 #define PCI_DEVICE_ID_NEO_2RJ45PRI      0x00CB
 
-
 #define PCI_VENDOR_ID_XIRCOM		0x115d
 #define PCI_DEVICE_ID_XIRCOM_RBM56G	0x0101
 #define PCI_DEVICE_ID_XIRCOM_X3201_MDM	0x0103
 
-
 #define PCI_VENDOR_ID_SERVERWORKS	  0x1166
 #define PCI_DEVICE_ID_SERVERWORKS_HE	  0x0008
 #define PCI_DEVICE_ID_SERVERWORKS_LE	  0x0009
@@ -1507,7 +1480,6 @@
 #define PCI_DEVICE_ID_ZEITNET_1221	0x0001
 #define PCI_DEVICE_ID_ZEITNET_1225	0x0002
 
-
 #define PCI_VENDOR_ID_FUJITSU_ME	0x119e
 #define PCI_DEVICE_ID_FUJITSU_FS155	0x0001
 #define PCI_DEVICE_ID_FUJITSU_FS50	0x0003
@@ -1525,28 +1497,23 @@
 #define PCI_DEVICE_ID_V3_V960		0x0001
 #define PCI_DEVICE_ID_V3_V351		0x0002
 
-
 #define PCI_VENDOR_ID_ATT		0x11c1
 #define PCI_DEVICE_ID_ATT_VENUS_MODEM	0x480
 
-
 #define PCI_VENDOR_ID_SPECIALIX		0x11cb
 #define PCI_DEVICE_ID_SPECIALIX_IO8	0x2000
 #define PCI_DEVICE_ID_SPECIALIX_RIO	0x8000
 #define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
 
-
 #define PCI_VENDOR_ID_ANALOG_DEVICES	0x11d4
 #define PCI_DEVICE_ID_AD1889JS		0x1889
 
-
 #define PCI_DEVICE_ID_SEGA_BBA		0x1234
 
 #define PCI_VENDOR_ID_ZORAN		0x11de
 #define PCI_DEVICE_ID_ZORAN_36057	0x6057
 #define PCI_DEVICE_ID_ZORAN_36120	0x6120
 
-
 #define PCI_VENDOR_ID_COMPEX		0x11f6
 #define PCI_DEVICE_ID_COMPEX_ENET100VG4	0x0112
 
@@ -1605,8 +1572,6 @@
 #define PCI_DEVICE_ID_3DFX_VOODOO3	0x0005
 #define PCI_DEVICE_ID_3DFX_VOODOO5	0x0009
 
-
-
 #define PCI_VENDOR_ID_AVM		0x1244
 #define PCI_DEVICE_ID_AVM_B1		0x0700
 #define PCI_DEVICE_ID_AVM_C4		0x0800
@@ -1615,7 +1580,6 @@
 #define PCI_DEVICE_ID_AVM_C2		0x1100
 #define PCI_DEVICE_ID_AVM_T1		0x1200
 
-
 #define PCI_VENDOR_ID_STALLION		0x124d
 
 /* Allied Telesyn */
@@ -1638,7 +1602,6 @@
 #define PCI_VENDOR_ID_SATSAGEM		0x1267
 #define PCI_DEVICE_ID_SATSAGEM_NICCY	0x1016
 
-
 #define PCI_VENDOR_ID_ENSONIQ		0x1274
 #define PCI_DEVICE_ID_ENSONIQ_CT5880	0x5880
 #define PCI_DEVICE_ID_ENSONIQ_ES1370	0x5000
@@ -1661,7 +1624,6 @@
 
 #define PCI_VENDOR_ID_ALTEON		0x12ae
 
-
 #define PCI_SUBVENDOR_ID_CONNECT_TECH			0x12c4
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232		0x0001
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232		0x0002
@@ -1692,7 +1654,6 @@
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_485	0x0331
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485	0x0332
 
-
 #define PCI_VENDOR_ID_NVIDIA_SGS	0x12d2
 #define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018
 
@@ -1802,7 +1763,6 @@
 #define PCI_DEVICE_ID_LMC_SSI		0x0005
 #define PCI_DEVICE_ID_LMC_T1		0x0006
 
-
 #define PCI_VENDOR_ID_NETGEAR		0x1385
 #define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
 
@@ -2019,7 +1979,6 @@
 #define PCI_SUBDEVICE_ID_PCI_RAS4       0xf001
 #define PCI_SUBDEVICE_ID_PCI_RAS8       0xf010
 
-
 #define PCI_VENDOR_ID_SYBA		0x1592
 #define PCI_DEVICE_ID_SYBA_2P_EPP	0x0782
 #define PCI_DEVICE_ID_SYBA_1P_ECP	0x0783
@@ -2043,7 +2002,6 @@
 
 #define PCI_VENDOR_ID_PDC		0x15e9
 
-
 #define PCI_VENDOR_ID_FARSITE           0x1619
 #define PCI_DEVICE_ID_FARSITE_T2P       0x0400
 #define PCI_DEVICE_ID_FARSITE_T4P       0x0440
@@ -2099,7 +2057,6 @@
 #define PCI_DEVICE_ID_HERC_WIN		0x5732
 #define PCI_DEVICE_ID_HERC_UNI		0x5832
 
-
 #define PCI_VENDOR_ID_SITECOM		0x182d
 #define PCI_DEVICE_ID_SITECOM_DC105V2	0x3069
 
@@ -2135,12 +2092,9 @@
 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2	0x0007
 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2V	0x0009
 
-
 #define PCI_VENDOR_ID_AKS		0x416c
 #define PCI_DEVICE_ID_AKS_ALADDINCARD	0x0100
 
-
-
 #define PCI_VENDOR_ID_S3		0x5333
 #define PCI_DEVICE_ID_S3_TRIO		0x8811
 #define PCI_DEVICE_ID_S3_868		0x8880
@@ -2152,7 +2106,6 @@
 #define PCI_VENDOR_ID_DUNORD		0x5544
 #define PCI_DEVICE_ID_DUNORD_I3000	0x0001
 
-
 #define PCI_VENDOR_ID_DCI		0x6666
 #define PCI_DEVICE_ID_DCI_PCCOM4	0x0001
 #define PCI_DEVICE_ID_DCI_PCCOM8	0x0002
@@ -2396,7 +2349,6 @@
 #define PCI_DEVICE_ID_ADAPTEC2_OBSIDIAN   0x0500
 #define PCI_DEVICE_ID_ADAPTEC2_SCAMP	0x0503
 
-
 #define PCI_VENDOR_ID_HOLTEK		0x9412
 #define PCI_DEVICE_ID_HOLTEK_6565	0x6565
 
-- 
cgit v1.2.3


From 579082df38839efc5b14aa3f48b8806e3e8dc5c2 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Tue, 10 Jul 2007 13:35:05 +0200
Subject: PCI: Fix typo in include/linux/pci.h

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 45332440a2e6..a6657b7f245d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -315,7 +315,7 @@ struct pci_dynids {
 
 /* ---------------------------------------------------------------- */
 /** PCI Error Recovery System (PCI-ERS).  If a PCI device driver provides
- *  a set fof callbacks in struct pci_error_handlers, then that device driver
+ *  a set of callbacks in struct pci_error_handlers, then that device driver
  *  will be notified of PCI bus errors, and will be driven to recovery
  *  when an error occurs.
  */
-- 
cgit v1.2.3


From 694625c0b322905d6892fad873029f764cd4823f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 9 Jul 2007 11:55:54 -0700
Subject: PCI: add pci_try_set_mwi

As suggested by Andrew, add pci_try_set_mwi(), which does not require
return-value checking.

- add pci_try_set_mwi() without __must_check
- make it return 0 on success, errno if the "try" failed or error
- review callers

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index a6657b7f245d..a5602e26f4dd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -545,6 +545,7 @@ void pci_set_master(struct pci_dev *dev);
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
 #define HAVE_PCI_SET_MWI
 int __must_check pci_set_mwi(struct pci_dev *dev);
+int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
 void pci_msi_off(struct pci_dev *dev);
-- 
cgit v1.2.3


From cfc94cdf8e0f14e692a5a40ef3cc10f464b2511b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 9 May 2007 13:19:52 +0200
Subject: debugfs: add rename for debugfs files

Implement debugfs_rename() to allow renaming files/directories in debugfs.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/debugfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 5a9c49534d08..104e51e20e14 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -38,6 +38,9 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 
 void debugfs_remove(struct dentry *dentry);
 
+struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+                struct dentry *new_dir, const char *new_name);
+
 struct dentry *debugfs_create_u8(const char *name, mode_t mode,
 				 struct dentry *parent, u8 *value);
 struct dentry *debugfs_create_u16(const char *name, mode_t mode,
@@ -85,6 +88,12 @@ static inline struct dentry *debugfs_create_symlink(const char *name,
 static inline void debugfs_remove(struct dentry *dentry)
 { }
 
+static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+                struct dentry *new_dir, char *new_name)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline struct dentry *debugfs_create_u8(const char *name, mode_t mode,
 					       struct dentry *parent,
 					       u8 *value)
-- 
cgit v1.2.3


From 4f5c791a850e5305a5b1b48d0e4b4de248dc96f9 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <mzxreary@0pointer.de>
Date: Tue, 8 May 2007 22:07:02 +0200
Subject: DMI-based module autoloading

The patch below adds DMI/SMBIOS based module autoloading to the Linux
kernel. The idea is to load laptop drivers automatically (and other
drivers which cannot be autoloaded otherwise), based on the DMI system
identification information of the BIOS.

Right now most distros manually try to load all available laptop
drivers on bootup in the hope that at least one of them loads
successfully. This patch does away with all that, and uses udev to
automatically load matching drivers on the right machines.

Basically the patch just exports the DMI information that has been
parsed by the kernel anyway to userspace via a sysfs device
/sys/class/dmi/id and makes sure that proper modalias attributes are
available. Besides adding the "modalias" attribute it also adds
attributes for a few other DMI fields which might be useful for
writing udev rules.

This patch is not an attempt to export the entire DMI/SMBIOS data to
userspace. We already have "dmidecode" which parses the complete DMI
info from userspace. The purpose of this patch is machine model
identification and good udev integration.

To take advantage of DMI based module autoloading, a driver should
export one or more MODULE_ALIAS fields similar to these:

MODULE_ALIAS("dmi:*:svnMICRO-STARINT'LCO.,LTD:pnMS-1013:pvr0131*:cvnMICRO-STARINT'LCO.,LTD:ct10:*");
MODULE_ALIAS("dmi:*:svnMicro-StarInternational:pnMS-1058:pvr0581:rvnMSI:rnMS-1058:*:ct10:*");
MODULE_ALIAS("dmi:*:svnMicro-StarInternational:pnMS-1412:*:rvnMSI:rnMS-1412:*:cvnMICRO-STARINT'LCO.,LTD:ct10:*");
MODULE_ALIAS("dmi:*:svnNOTEBOOK:pnSAM2000:pvr0131*:cvnMICRO-STARINT'LCO.,LTD:ct10:*");

These lines are specific to my msi-laptop.c driver. They are basically
just a concatenation of a few carefully selected DMI fields with all
potentially bad characters stripped.

Besides laptop drivers, modules like "hdaps", the i2c modules
and the hwmon modules are good candidates for "dmi:" MODULE_ALIAS
lines.

Besides merely exporting the DMI data via sysfs the patch adds
support for a few more DMI fields. Especially the CHASSIS fields are
very useful to identify different laptop modules. The patch also adds
working MODULE_ALIAS lines to my msi-laptop.c driver.

I'd like to thank Kay Sievers for helping me to clean up this patch
for posting it on lkml.

Patch is against Linus' current GIT HEAD. Should probably apply to
older kernels as well without modification.


Signed-off-by: Lennart Poettering <mzxreary@0pointer.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/dmi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 904bf3d2d90b..b8ac7b01c45e 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -12,9 +12,17 @@ enum dmi_field {
 	DMI_PRODUCT_NAME,
 	DMI_PRODUCT_VERSION,
 	DMI_PRODUCT_SERIAL,
+	DMI_PRODUCT_UUID,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,
+	DMI_BOARD_SERIAL,
+	DMI_BOARD_ASSET_TAG,
+	DMI_CHASSIS_VENDOR,
+	DMI_CHASSIS_TYPE,
+	DMI_CHASSIS_VERSION,
+	DMI_CHASSIS_SERIAL,
+	DMI_CHASSIS_ASSET_TAG,
 	DMI_STRING_MAX,
 };
 
-- 
cgit v1.2.3


From 9cddad77574313fcee36c5e60122718daa7c0361 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 13 Jun 2007 15:53:34 +0200
Subject: PM: Remove pm_parent from struct dev_pm_info

The pm_parent member of struct dev_pm_info (defined in include/linux/pm.h) is
only used to check if the device's parent is in the right state while the
device is being suspended or resumed.  However, this can be done just as well
with the help of the parent pointer in struct device, so pm_parent can be
removed along with some code that handles it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index b2c4fde4e994..3fd65ad4b097 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -269,13 +269,10 @@ struct dev_pm_info {
 	unsigned		should_wakeup:1;
 	pm_message_t		prev_state;
 	void			* saved_state;
-	struct device		* pm_parent;
 	struct list_head	entry;
 #endif
 };
 
-extern void device_pm_set_parent(struct device * dev, struct device * parent);
-
 extern int device_power_down(pm_message_t state);
 extern void device_power_up(void);
 extern void device_resume(void);
-- 
cgit v1.2.3


From cc4900690bf77257996e90f0059eb074b8db52e6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 13 Jun 2007 15:55:34 +0200
Subject: PM: Remove saved_state from struct dev_pm_info

The saved_state member of struct dev_pm_info, defined in include/linux/pm.h, is
not used anywhere, so it can be removed.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3fd65ad4b097..6e7f06671683 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -268,7 +268,6 @@ struct dev_pm_info {
 #ifdef	CONFIG_PM
 	unsigned		should_wakeup:1;
 	pm_message_t		prev_state;
-	void			* saved_state;
 	struct list_head	entry;
 #endif
 };
-- 
cgit v1.2.3


From 515c53576299e32d6bdb6295cfa2fe1307516eb4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 17 Jun 2007 19:48:06 +0200
Subject: PM: Remove prev_state from struct dev_pm_info

The prev_state member of struct dev_pm_info (defined in include/linux/pm.h) is
only used during a resume to check if the device's state before the suspend was
'off', in which case the device is not resumed.  However, in such cases the
decision whether or not to resume the device should be made on the driver level
and the resume callbacks from the device's bus and class should be executed
anyway (the may be needed for some things other than just powering on the
device).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6e7f06671683..273781c82e4d 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -267,7 +267,6 @@ struct dev_pm_info {
 	unsigned		can_wakeup:1;
 #ifdef	CONFIG_PM
 	unsigned		should_wakeup:1;
-	pm_message_t		prev_state;
 	struct list_head	entry;
 #endif
 };
-- 
cgit v1.2.3


From 72dba584b695d8bc8c1a50ed54ad4cba7c62314d Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 03:45:13 +0900
Subject: ida: implement idr based id allocator

Implement idr based id allocator.  ida is used the same way idr is
used but lacks id -> ptr translation and thus consumes much less
memory.  struct ida_bitmap is attached as leaf nodes to idr tree which
is managed by the idr code.  Each ida_bitmap is 128bytes long and
contains slightly less than a thousand slots.

ida is more aggressive with releasing extra resources acquired using
ida_pre_get().  After every successful id allocation, ida frees one
reserved idr_layer if possible.  Reserved ida_bitmap is not freed
automatically but only one ida_bitmap is reserved and it's almost
always used right away.  Under most circumstances, ida won't hold on
to memory for too long which isn't actively used.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/idr.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 826803449db7..915572fa030b 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -83,4 +83,33 @@ void idr_remove(struct idr *idp, int id);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
 
+
+/*
+ * IDA - IDR based id allocator, use when translation from id to
+ * pointer isn't necessary.
+ */
+#define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
+#define IDA_BITMAP_LONGS	(128 / sizeof(long) - 1)
+#define IDA_BITMAP_BITS		(IDA_BITMAP_LONGS * sizeof(long) * 8)
+
+struct ida_bitmap {
+	long			nr_busy;
+	unsigned long		bitmap[IDA_BITMAP_LONGS];
+};
+
+struct ida {
+	struct idr		idr;
+	struct ida_bitmap	*free_bitmap;
+};
+
+#define IDA_INIT(name)		{ .idr = IDR_INIT(name), .free_bitmap = NULL, }
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
+
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
+int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
+int ida_get_new(struct ida *ida, int *p_id);
+void ida_remove(struct ida *ida, int id);
+void ida_destroy(struct ida *ida);
+void ida_init(struct ida *ida);
+
 #endif /* __IDR_H__ */
-- 
cgit v1.2.3


From 0c096b507f15397da890051ee73de4266d3941fb Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 03:45:15 +0900
Subject: sysfs: add sysfs_dirent->s_name

Add s_name to sysfs_dirent.  This is to further reduce dependency to
the associated dentry.  Name is copied for directories and symlinks
but not for attributes.

Where possible, name dereferences are converted to use sd->s_name.
sysfs_symlink->link_name and sysfs_get_name() are unused now and
removed.

This change allows symlink to be implemented using sysfs_dirent tree
proper, which is the last remaining dentry-dependent sysfs walk.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7d5d1ec95c2e..2f86b080b39d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -76,6 +76,7 @@ struct sysfs_ops {
 #define SYSFS_KOBJ_BIN_ATTR	0x0008
 #define SYSFS_KOBJ_LINK 	0x0020
 #define SYSFS_NOT_PINNED	(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
+#define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
 
 #ifdef CONFIG_SYSFS
 
-- 
cgit v1.2.3


From 7b595756ec1f49e0049a9e01a1298d53a7faaa15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 03:45:17 +0900
Subject: sysfs: kill unnecessary attribute->owner

sysfs is now completely out of driver/module lifetime game.  After
deletion, a sysfs node doesn't access anything outside sysfs proper,
so there's no reason to hold onto the attribute owners.  Note that
often the wrong modules were accounted for as owners leading to
accessing removed modules.

This patch kills now unnecessary attribute->owner.  Note that with
this change, userland holding a sysfs node does not prevent the
backing module from being unloaded.

For more info regarding lifetime rule cleanup, please read the
following message.

  http://article.gmane.org/gmane.linux.kernel/510293

(tweaked by Greg to not delete the field just yet, to make it easier to
merge things properly.)

Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysdev.h |  3 +--
 include/linux/sysfs.h  | 12 ++++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index e699ab279c2c..e285746588d6 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -101,8 +101,7 @@ struct sysdev_attribute {
 
 #define _SYSDEV_ATTR(_name,_mode,_show,_store)			\
 {								\
-	.attr = { .name = __stringify(_name), .mode = _mode,	\
-		 .owner = THIS_MODULE },			\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
 	.show	= _show,					\
 	.store	= _store,					\
 }
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 2f86b080b39d..161e19aa2b4f 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,9 +20,13 @@ struct module;
 struct nameidata;
 struct dentry;
 
+/* FIXME
+ * The *owner field is no longer used, but leave around
+ * until the tree gets cleaned up fully.
+ */
 struct attribute {
 	const char		* name;
-	struct module 		* owner;
+	struct module		* owner;
 	mode_t			mode;
 };
 
@@ -39,14 +43,14 @@ struct attribute_group {
  */
 
 #define __ATTR(_name,_mode,_show,_store) { \
-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE },	\
+	.attr = {.name = __stringify(_name), .mode = _mode },	\
 	.show	= _show,					\
 	.store	= _store,					\
 }
 
 #define __ATTR_RO(_name) { \
-	.attr	= { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE },	\
-	.show	= _name##_show,	\
+	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
+	.show	= _name##_show,					\
 }
 
 #define __ATTR_NULL { .attr = { .name = NULL } }
-- 
cgit v1.2.3


From ad6a1e1c66009ba9dcd2f5c90ffa1fb4ce72fce0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 03:45:17 +0900
Subject: driver-core: make devt_attr and uevent_attr static

devt_attr and uevent_attr are either allocated dynamically with or
embedded in device and class_device as they needed their owner field
set to the module implementing the driver.  Now that sysfs implements
immediate disconnect and owner field removed from struct attribute,
there is no reason to do this.  Remove these attributes from
[class_]device and use static attribute structures instead.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 2e1a2988b7e1..be2debed70d2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -238,7 +238,6 @@ extern int __must_check class_device_create_file(struct class_device *,
  * @devt: for internal use by the driver core only.
  * @node: for internal use by the driver core only.
  * @kobj: for internal use by the driver core only.
- * @devt_attr: for internal use by the driver core only.
  * @groups: optional additional groups to be created
  * @dev: if set, a symlink to the struct device is created in the sysfs
  * directory for this struct class device.
@@ -263,8 +262,6 @@ struct class_device {
 	struct kobject		kobj;
 	struct class		* class;	/* required */
 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
-	struct class_device_attribute *devt_attr;
-	struct class_device_attribute uevent_attr;
 	struct device		* dev;		/* not necessary, but nice to have */
 	void			* class_data;	/* class-specific data */
 	struct class_device	*parent;	/* parent of this child device, if there is one */
@@ -419,8 +416,6 @@ struct device {
 	struct device_type	*type;
 	unsigned		is_registered:1;
 	unsigned		uevent_suppress:1;
-	struct device_attribute uevent_attr;
-	struct device_attribute *devt_attr;
 
 	struct semaphore	sem;	/* semaphore to synchronize calls to
 					 * its driver.
-- 
cgit v1.2.3


From b402d72cf7b338a074e3c12b305ec79284e18845 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 04:27:21 +0900
Subject: sysfs: rename sysfs_dirent->s_type to s_flags and make room for flags

Rename sysfs_dirent->s_type to s_flags, pack type into lower eight
bits and reserve the rest for flags.  sysfs_type() can used to access
the type.  All existing sd->s_type accesses are converted to use
sysfs_type().  While at it, type test is changed to equality test
instead of bit-and test where appropriate.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 161e19aa2b4f..58135509023e 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -74,6 +74,7 @@ struct sysfs_ops {
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
 };
 
+#define SYSFS_TYPE_MASK		0x00ff
 #define SYSFS_ROOT		0x0001
 #define SYSFS_DIR		0x0002
 #define SYSFS_KOBJ_ATTR 	0x0004
@@ -82,6 +83,8 @@ struct sysfs_ops {
 #define SYSFS_NOT_PINNED	(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
 #define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
 
+#define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK
+
 #ifdef CONFIG_SYSFS
 
 extern int sysfs_schedule_callback(struct kobject *kobj,
-- 
cgit v1.2.3


From 380e6fbb729a55b73d5d8409551474884e0d93fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 04:27:22 +0900
Subject: sysfs: implement SYSFS_FLAG_REMOVED flag

Implement SYSFS_FLAG_REMOVED flag which currently is used only to
improve sanity check in sysfs_deactivate().  The flag will be used to
make directory entries reclamiable.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 58135509023e..2a6df6444e69 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -84,6 +84,7 @@ struct sysfs_ops {
 #define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
 
 #define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK
+#define SYSFS_FLAG_REMOVED	0x0100
 
 #ifdef CONFIG_SYSFS
 
-- 
cgit v1.2.3


From 608e266a2d4e62c1b98c1c573064b6afe8c06a58 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 04:27:22 +0900
Subject: sysfs: make kobj point to sysfs_dirent instead of dentry

As kobj sysfs dentries and inodes are gonna be made reclaimable,
dentry can't be used as naming token for sysfs file/directory, replace
kobj->dentry with kobj->sd.  The only external interface change is
shadow directory handling.  All other changes are contained in kobj
and sysfs.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h |  9 +++++----
 include/linux/sysfs.h   | 19 +++++++++++--------
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index c288e41ba331..06cbf41d32d2 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -55,7 +55,7 @@ struct kobject {
 	struct kobject		* parent;
 	struct kset		* kset;
 	struct kobj_type	* ktype;
-	struct dentry		* dentry;
+	struct sysfs_dirent	* sd;
 	wait_queue_head_t	poll;
 };
 
@@ -71,13 +71,14 @@ extern void kobject_init(struct kobject *);
 extern void kobject_cleanup(struct kobject *);
 
 extern int __must_check kobject_add(struct kobject *);
-extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *);
+extern int __must_check kobject_shadow_add(struct kobject *kobj,
+					   struct sysfs_dirent *shadow_parent);
 extern void kobject_del(struct kobject *);
 
 extern int __must_check kobject_rename(struct kobject *, const char *new_name);
 extern int __must_check kobject_shadow_rename(struct kobject *kobj,
-						struct dentry *new_parent,
-						const char *new_name);
+					      struct sysfs_dirent *new_parent,
+					      const char *new_name);
 extern int __must_check kobject_move(struct kobject *, struct kobject *);
 
 extern int __must_check kobject_register(struct kobject *);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 2a6df6444e69..4c43030fae5d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -19,6 +19,7 @@ struct kobject;
 struct module;
 struct nameidata;
 struct dentry;
+struct sysfs_dirent;
 
 /* FIXME
  * The *owner field is no longer used, but leave around
@@ -92,13 +93,14 @@ extern int sysfs_schedule_callback(struct kobject *kobj,
 		void (*func)(void *), void *data, struct module *owner);
 
 extern int __must_check
-sysfs_create_dir(struct kobject *, struct dentry *);
+sysfs_create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent_sd);
 
 extern void
 sysfs_remove_dir(struct kobject *);
 
 extern int __must_check
-sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name);
+sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
+		 const char *new_name);
 
 extern int __must_check
 sysfs_move_dir(struct kobject *, struct kobject *);
@@ -138,8 +140,8 @@ void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
 extern int sysfs_make_shadowed_dir(struct kobject *kobj,
 	void * (*follow_link)(struct dentry *, struct nameidata *));
-extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj);
-extern void sysfs_remove_shadow_dir(struct dentry *dir);
+extern struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj);
+extern void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd);
 
 extern int __must_check sysfs_init(void);
 
@@ -151,7 +153,8 @@ static inline int sysfs_schedule_callback(struct kobject *kobj,
 	return -ENOSYS;
 }
 
-static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow)
+static inline int sysfs_create_dir(struct kobject *kobj,
+				   struct sysfs_dirent *shadow_parent_sd)
 {
 	return 0;
 }
@@ -161,9 +164,9 @@ static inline void sysfs_remove_dir(struct kobject * k)
 	;
 }
 
-static inline int sysfs_rename_dir(struct kobject * k,
-					struct dentry *new_parent,
-					const char *new_name)
+static inline int sysfs_rename_dir(struct kobject *kobj,
+				   struct sysfs_dirent *new_parent_sd,
+				   const char *new_name)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 51225039f3cf9d250596d1344494b293274b9169 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 14 Jun 2007 04:27:25 +0900
Subject: sysfs: make directory dentries and inodes reclaimable

This patch makes dentries and inodes for sysfs directories
reclaimable.

* sysfs_notify() is modified to walk sysfs_dirent tree instead of
  dentry tree.

* sysfs_update_file() and sysfs_chmod_file() use sysfs_get_dentry() to
  grab the victim dentry.

* sysfs_rename_dir() and sysfs_move_dir() grab all dentries using
  sysfs_get_dentry() on startup.

* Dentries for all shadowed directories are pinned in memory to serve
  as lookup start point.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 4c43030fae5d..2f58ca1af770 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -81,7 +81,6 @@ struct sysfs_ops {
 #define SYSFS_KOBJ_ATTR 	0x0004
 #define SYSFS_KOBJ_BIN_ATTR	0x0008
 #define SYSFS_KOBJ_LINK 	0x0020
-#define SYSFS_NOT_PINNED	(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
 #define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
 
 #define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK
-- 
cgit v1.2.3


From 91a6902958f052358899f58683d44e36228d85c2 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Sat, 9 Jun 2007 13:57:22 +0800
Subject: sysfs: add parameter "struct bin_attribute *" in .read/.write methods
 for sysfs binary attributes

Well, first of all, I don't want to change so many files either.

What I do:
Adding a new parameter "struct bin_attribute *" in the
.read/.write methods for the sysfs binary attributes.

In fact, only the four lines change in fs/sysfs/bin.c and
include/linux/sysfs.h do the real work.
But I have to update all the files that use binary attributes
to make them compatible with the new .read and .write methods.
I'm not sure if I missed any. :(

Why I do this:
For a sysfs attribute, we can get a pointer pointing to the
struct attribute in the .show/.store method,
while we can't do this for the binary attributes.
I don't know why this is different, but this does make it not
so handy to use the binary attributes as the regular ones.
So I think this patch is reasonable. :)

Who benefits from it:
The patch that exposes ACPI tables in sysfs
requires such an improvement.
All the table binary attributes share the same .read method.
Parameter "struct bin_attribute *" is used to get
the table signature and instance number which are used to
distinguish different ACPI table binary attributes.

Without this parameter, we need to offer different .read methods
for different ACPI table binary attributes.
This is impossible as there are various ACPI tables on different
platforms, and we don't know what they are until they are loaded.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 2f58ca1af770..be8228e50a27 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -64,8 +64,10 @@ struct bin_attribute {
 	struct attribute	attr;
 	size_t			size;
 	void			*private;
-	ssize_t (*read)(struct kobject *, char *, loff_t, size_t);
-	ssize_t (*write)(struct kobject *, char *, loff_t, size_t);
+	ssize_t (*read)(struct kobject *, struct bin_attribute *,
+			char *, loff_t, size_t);
+	ssize_t (*write)(struct kobject *, struct bin_attribute *,
+			 char *, loff_t, size_t);
 	int (*mmap)(struct kobject *, struct bin_attribute *attr,
 		    struct vm_area_struct *vma);
 };
-- 
cgit v1.2.3


From 29578624e354f56143d92510fff33a8b2aaa2c03 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch@oracle.com>
Date: Wed, 11 Jul 2007 19:32:02 -0700
Subject: [NET]: Fix races in net_rx_action vs netpoll.

Keep netpoll/poll_napi from messing with the poll_list.
Only net_rx_action is allowed to manipulate the list.

Signed-off-by: Olaf Kirch <olaf.kirch@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8590d685d935..79cc3dab4be7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -261,6 +261,8 @@ enum netdev_state_t
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
 	__LINK_STATE_QDISC_RUNNING,
+	/* Set by the netpoll NAPI code */
+	__LINK_STATE_POLL_LIST_FROZEN,
 };
 
 
@@ -1014,6 +1016,14 @@ static inline void netif_rx_complete(struct net_device *dev)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_NETPOLL
+	/* Prevent race with netpoll - yes, this is a kludge.
+	 * But at least it doesn't penalize the non-netpoll
+	 * code path. */
+	if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state))
+		return;
+#endif
+
 	local_irq_save(flags);
 	__netif_rx_complete(dev);
 	local_irq_restore(flags);
-- 
cgit v1.2.3


From 8c979c26a0f093c13290320edda799d8335e50ae Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:45:24 -0700
Subject: [VLAN]: Fix MAC address handling

The VLAN MAC address handling is broken in multiple ways. When the address
differs when setting it, the real device is put in promiscous mode twice,
but never taken out again. Additionally it doesn't resync when the real
device's address is changed and needlessly puts it in promiscous mode when
the vlan device is still down.

Fix by moving address handling to vlan_dev_open/vlan_dev_stop and properly
deal with address changes in the device notifier. Also switch to
dev_unicast_add (which needs the exact same handling).

Since the set_mac_address handler is identical to the generic ethernet one
with these changes, kill it and use ether_setup().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c7912876a210..61a57dc2ac99 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -135,6 +135,7 @@ struct vlan_dev_info {
 	int old_allmulti;               /* similar to above. */
 	int old_promiscuity;            /* similar to above. */
 	struct net_device *real_dev;    /* the underlying device/interface */
+	unsigned char real_dev_addr[ETH_ALEN];
 	struct proc_dir_entry *dent;    /* Holds the proc data */
 	unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */
 	unsigned long cnt_encap_on_xmit;      /* How many times did we have to encapsulate the skb on TX. */
-- 
cgit v1.2.3


From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jul 2007 19:46:26 -0700
Subject: [NET_SCHED]: ematch: module autoloading

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index c3f01b3085a4..30b8571e6b34 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -403,16 +403,13 @@ enum
  *   1..32767		Reserved for ematches inside kernel tree
  *   32768..65535	Free to use, not reliable
  */
-enum
-{
-	TCF_EM_CONTAINER,
-	TCF_EM_CMP,
-	TCF_EM_NBYTE,
-	TCF_EM_U32,
-	TCF_EM_META,
-	TCF_EM_TEXT,
-	__TCF_EM_MAX
-};
+#define	TCF_EM_CONTAINER	0
+#define	TCF_EM_CMP		1
+#define	TCF_EM_NBYTE		2
+#define	TCF_EM_U32		3
+#define	TCF_EM_META		4
+#define	TCF_EM_TEXT		5
+#define	TCF_EM_MAX		5
 
 enum
 {
-- 
cgit v1.2.3


From ed0321895182ffb6ecf210e066d87911b270d587 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Jun 2007 15:55:21 -0400
Subject: security: Protection for exploiting null dereference using mmap

Add a new security check on mmap operations to see if the user is attempting
to mmap to low area of the address space.  The amount of space protected is
indicated by the new proc tunable /proc/sys/vm/mmap_min_addr and defaults to
0, preserving existing behavior.

This patch uses a new SELinux security class "memprotect."  Policy already
contains a number of allow rules like a_t self:process * (unconfined_t being
one of them) which mean that putting this check in the process class (its
best current fit) would make it useless as all user processes, which we also
want to protect against, would be allowed. By taking the memprotect name of
the new class it will also make it possible for us to move some of the other
memory protect permissions out of 'process' and into the new class next time
we bump the policy version number (which I also think is a good future idea)

Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 9eb9e0fe0331..c11dc8aa0351 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -71,6 +71,7 @@ struct xfrm_user_sec_ctx;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+extern unsigned long mmap_min_addr;
 /*
  * Values used in the task_security_ops calls
  */
@@ -1241,8 +1242,9 @@ struct security_operations {
 	int (*file_ioctl) (struct file * file, unsigned int cmd,
 			   unsigned long arg);
 	int (*file_mmap) (struct file * file,
-			  unsigned long reqprot,
-			  unsigned long prot, unsigned long flags);
+			  unsigned long reqprot, unsigned long prot,
+			  unsigned long flags, unsigned long addr,
+			  unsigned long addr_only);
 	int (*file_mprotect) (struct vm_area_struct * vma,
 			      unsigned long reqprot,
 			      unsigned long prot);
@@ -1814,9 +1816,12 @@ static inline int security_file_ioctl (struct file *file, unsigned int cmd,
 
 static inline int security_file_mmap (struct file *file, unsigned long reqprot,
 				      unsigned long prot,
-				      unsigned long flags)
+				      unsigned long flags,
+				      unsigned long addr,
+				      unsigned long addr_only)
 {
-	return security_ops->file_mmap (file, reqprot, prot, flags);
+	return security_ops->file_mmap (file, reqprot, prot, flags, addr,
+					addr_only);
 }
 
 static inline int security_file_mprotect (struct vm_area_struct *vma,
@@ -2489,7 +2494,9 @@ static inline int security_file_ioctl (struct file *file, unsigned int cmd,
 
 static inline int security_file_mmap (struct file *file, unsigned long reqprot,
 				      unsigned long prot,
-				      unsigned long flags)
+				      unsigned long flags,
+				      unsigned long addr,
+				      unsigned long addr_only)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From d64f73be1b59b9556de0a8fbd4f1a003c6a45a5c Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Thu, 12 Jul 2007 14:12:28 +0200
Subject: i2c: Add kernel documentation

Generate I2C kerneldoc; fix various glitches and add "context" sections to
that documentation.  Most I2C and SMBus functions still have no kerneldoc.

Let me suggest providing kerneldoc for all the i2c_smbus_*() functions as
a small and mostly self-contained project for anyone so inclined.  :)

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cae7d618030c..a24e267fd189 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -150,15 +150,20 @@ struct i2c_driver {
 
 /**
  * struct i2c_client - represent an I2C slave device
+ * @flags: I2C_CLIENT_TEN indicates the device uses a ten bit chip address;
+ *	I2C_CLIENT_PEC indicates it uses SMBus Packet Error Checking
  * @addr: Address used on the I2C bus connected to the parent adapter.
  * @name: Indicates the type of the device, usually a chip name that's
  *	generic enough to hide second-sourcing and compatible revisions.
+ * @adapter: manages the bus segment hosting this I2C device
  * @dev: Driver model device node for the slave.
+ * @irq: indicates the IRQ generated by this device (if any)
  * @driver_name: Identifies new-style driver used with this device; also
  *	used as the module name for hotplug/coldplug modprobe support.
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
- * i2c bus. The behaviour is defined by the routines of the driver.
+ * i2c bus. The behaviour exposed to Linux is defined by the driver
+ * managing the device.
  */
 struct i2c_client {
 	unsigned short flags;		/* div., see below		*/
@@ -201,7 +206,7 @@ static inline void i2c_set_clientdata (struct i2c_client *dev, void *data)
  * @addr: stored in i2c_client.addr
  * @platform_data: stored in i2c_client.dev.platform_data
  * @irq: stored in i2c_client.irq
-
+ *
  * I2C doesn't actually support hardware probing, although controllers and
  * devices may be able to use I2C_SMBUS_QUICK to tell whether or not there's
  * a device at a given address.  Drivers commonly need more information than
@@ -210,7 +215,7 @@ static inline void i2c_set_clientdata (struct i2c_client *dev, void *data)
  * i2c_board_info is used to build tables of information listing I2C devices
  * that are present.  This information is used to grow the driver model tree
  * for "new style" I2C drivers.  For mainboards this is done statically using
- * i2c_register_board_info(), where @bus_num represents an adapter that isn't
+ * i2c_register_board_info(); bus numbers identify adapters that aren't
  * yet available.  For add-on boards, i2c_new_device() does this dynamically
  * with the adapter already known.
  */
-- 
cgit v1.2.3


From d75d53cd571c02990d56e72f615ab11e943772f9 Mon Sep 17 00:00:00 2001
From: "Mark M. Hoffman" <mhoffman@lightlink.com>
Date: Thu, 12 Jul 2007 14:12:28 +0200
Subject: i2c: Fix sparse warning in i2c.h

Kill a sparse warning by un-nesting two container_of() calls.

Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a24e267fd189..44f2ecf47d9f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -185,7 +185,8 @@ struct i2c_client {
 
 static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj)
 {
-	return to_i2c_client(container_of(kobj, struct device, kobj));
+	struct device * const dev = container_of(kobj, struct device, kobj);
+	return to_i2c_client(dev);
 }
 
 static inline void *i2c_get_clientdata (struct i2c_client *dev)
-- 
cgit v1.2.3


From 4b2643d7d9bdcd776749e17f73c168ddf02e93cb Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 12 Jul 2007 14:12:29 +0200
Subject: i2c: Fix the i2c_smbus_read_i2c_block_data() prototype

Let the drivers specify how many bytes they want to read with
i2c_smbus_read_i2c_block_data(). So far, the block count was
hard-coded to I2C_SMBUS_BLOCK_MAX (32), which did not make much sense.
Many driver authors complained about this before, and I believe it's
about time to fix it. Right now, authors have to do technically stupid
things, such as individual byte reads or full-fledged I2C messaging,
to work around the problem. We do not want to encourage that.

I even found that some bus drivers (e.g. i2c-amd8111) already
implemented I2C block read the "right" way, that is, they didn't
follow the old, broken standard. The fact that it was never noticed
before just shows how little i2c_smbus_read_i2c_block_data() was used,
which isn't that surprising given how broken its prototype was so far.

There are some obvious compatiblity considerations:
* This changes the i2c_smbus_read_i2c_block_data() prototype. Users
  outside the kernel tree will notice at compilation time, and will
  have to update their code.
* User-space has access to i2c_smbus_xfer() directly using i2c-dev, so
  the changed expectations would affect tools such as i2cdump. In order
  to preserve binary compatibility, we give I2C_SMBUS_I2C_BLOCK_DATA
  a new numeric value, and define I2C_SMBUS_I2C_BLOCK_BROKEN with the
  old numeric value. When i2c-dev receives a transaction with the
  old value, it can convert it to the new format on the fly.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 44f2ecf47d9f..2eaba21b9b1a 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -90,7 +90,7 @@ extern s32 i2c_smbus_write_block_data(struct i2c_client * client,
 				      const u8 *values);
 /* Returns the number of read bytes */
 extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client,
-					 u8 command, u8 *values);
+					 u8 command, u8 length, u8 *values);
 extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
 					  u8 command, u8 length,
 					  const u8 *values);
@@ -524,8 +524,9 @@ union i2c_smbus_data {
 #define I2C_SMBUS_WORD_DATA	    3
 #define I2C_SMBUS_PROC_CALL	    4
 #define I2C_SMBUS_BLOCK_DATA	    5
-#define I2C_SMBUS_I2C_BLOCK_DATA    6
+#define I2C_SMBUS_I2C_BLOCK_BROKEN  6
 #define I2C_SMBUS_BLOCK_PROC_CALL   7		/* SMBus 2.0 */
+#define I2C_SMBUS_I2C_BLOCK_DATA    8
 
 
 /* ----- commands for the ioctl like i2c_command call:
-- 
cgit v1.2.3


From c29c22218b99dad95f7cd0281415a854aeee805c Mon Sep 17 00:00:00 2001
From: Henry Su <henry.su@amd.com>
Date: Thu, 12 Jul 2007 14:12:29 +0200
Subject: i2c-piix4: Add support for the ATI SB700

Add the SMBus device ID for ATI SB700.

Signed-off-by: Henry Su <Henry.su@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 75c4d4d06892..8300001e9078 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -371,6 +371,7 @@
 #define PCI_DEVICE_ID_ATI_IXP600_SMBUS	0x4385
 #define PCI_DEVICE_ID_ATI_IXP600_IDE	0x438c
 #define PCI_DEVICE_ID_ATI_IXP700_SATA	0x4390
+#define PCI_DEVICE_ID_ATI_IXP700_SMBUS	0x4395
 #define PCI_DEVICE_ID_ATI_IXP700_IDE	0x439c
 
 #define PCI_VENDOR_ID_VLSI		0x1004
-- 
cgit v1.2.3


From b9cdad74883a797952de52464d118d685cafc05a Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 12 Jul 2007 14:12:31 +0200
Subject: i2c: New bus driver for the TAOS evaluation modules

This is a new I2C bus driver for the TAOS evaluation modules. Developped
and tested on the TAOS TSL2550 EVM.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/serio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index 1ebf0455e224..d9377ce9ffd1 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -209,5 +209,6 @@ static inline void serio_unpin_driver(struct serio *serio)
 #define SERIO_PENMOUNT	0x31
 #define SERIO_TOUCHRIGHT	0x32
 #define SERIO_TOUCHWIN	0x33
+#define SERIO_TAOSEVM	0x34
 
 #endif
-- 
cgit v1.2.3


From e087db510cd96a75a614f6f6fcd5499ab21cb087 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 11 Jul 2007 12:18:31 -0700
Subject: Clean up struct screen_info (<linux/screen_info.h>)

struct screen_info has unaligned members, it needs to be packed.
In the process, fix the naming of some of the members, which don't
belong in this structure but are part of it anyway.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/screen_info.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index b02308ee7667..3ee412bc00ec 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -10,7 +10,7 @@
 struct screen_info {
 	u8  orig_x;		/* 0x00 */
 	u8  orig_y;		/* 0x01 */
-	u16 dontuse1;		/* 0x02 -- EXT_MEM_K sits here */
+	u16 ext_mem_k;		/* 0x02 */
 	u16 orig_video_page;	/* 0x04 */
 	u8  orig_video_mode;	/* 0x06 */
 	u8  orig_video_cols;	/* 0x07 */
@@ -27,7 +27,7 @@ struct screen_info {
 	u16 lfb_depth;		/* 0x16 */
 	u32 lfb_base;		/* 0x18 */
 	u32 lfb_size;		/* 0x1c */
-	u16 dontuse2, dontuse3;	/* 0x20 -- CL_MAGIC and CL_OFFSET here */
+	u16 cl_magic, cl_offset; /* 0x20 */
 	u16 lfb_linelength;	/* 0x24 */
 	u8  red_size;		/* 0x26 */
 	u8  red_pos;		/* 0x27 */
@@ -42,9 +42,8 @@ struct screen_info {
 	u16 pages;		/* 0x32 */
 	u16 vesa_attributes;	/* 0x34 */
 	u32 capabilities;       /* 0x36 */
-				/* 0x3a -- 0x3b reserved for future expansion */
-				/* 0x3c -- 0x3f micro stack for relocatable kernels */
-};
+	u8  _reserved[6];	/* 0x3a */
+} __attribute__((packed));
 
 extern struct screen_info screen_info;
 
-- 
cgit v1.2.3


From c39736823232bc3ca113c8228fa852c09fba300e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 11 Jul 2007 12:18:58 -0700
Subject: Remove old i386 setup code

This removes the old i386 setup code.  This is done as a separate patch
to avoid breaking git bisect as some of the i386 code was also used by
the old x86-64 code.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/edd.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/edd.h b/include/linux/edd.h
index b2b3e68aa512..7b647822d6dc 100644
--- a/include/linux/edd.h
+++ b/include/linux/edd.h
@@ -49,10 +49,6 @@
 #define EDD_MBR_SIG_MAX 16        /* max number of signatures to store */
 #define EDD_MBR_SIG_NR_BUF 0x1ea  /* addr of number of MBR signtaures at EDD_MBR_SIG_BUF
 				     in boot_params - treat this as 1 byte  */
-#define EDD_CL_EQUALS   0x3d646465     /* "edd=" */
-#define EDD_CL_OFF      0x666f         /* "of" for off  */
-#define EDD_CL_SKIP     0x6b73         /* "sk" for skipmbr */
-#define EDD_CL_ON       0x6e6f	       /* "on" for on */
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From 5628221caf88e2a052782b042e12da7cd34111b0 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Tue, 10 Jul 2007 19:32:10 +0200
Subject: [PATCH] mac80211: ERP IE handling improvements

The "protection needed" flag is currently parsed out of the ERP IE in
beacons. This patch allows the ERP IE to be available at assocation time
and causes the appropriate actions to be performed earlier.

It is slightly complicated by the fact that most APs don't include the
ERP IE in association responses. To work around this, we store ERP
values in the ieee80211_sta_bss structure.

Also added some WLAN_ERP defines for use by upcoming patches.

Signed-off-by: Jiri Benc <jbenc@suse.cz>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ecd61e8438a5..272f8c8c90da 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -227,6 +227,17 @@ struct ieee80211_cts {
 #define WLAN_CAPABILITY_SHORT_SLOT_TIME	(1<<10)
 #define WLAN_CAPABILITY_DSSS_OFDM	(1<<13)
 
+/* 802.11g ERP information element */
+#define WLAN_ERP_NON_ERP_PRESENT (1<<0)
+#define WLAN_ERP_USE_PROTECTION (1<<1)
+#define WLAN_ERP_BARKER_PREAMBLE (1<<2)
+
+/* WLAN_ERP_BARKER_PREAMBLE values */
+enum {
+	WLAN_ERP_PREAMBLE_SHORT = 0,
+	WLAN_ERP_PREAMBLE_LONG = 1,
+};
+
 /* Status codes */
 enum ieee80211_statuscode {
 	WLAN_STATUS_SUCCESS = 0,
-- 
cgit v1.2.3


From 6a775e2ba4f7635849ade628e64723ab2beef0bc Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 21 Jun 2007 12:27:47 +0300
Subject: IB/mlx4: Implement query QP

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/linux/mlx4/qp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 10c57d279144..3968b943259a 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -282,6 +282,9 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
 		   int sqd_event, struct mlx4_qp *qp);
 
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+		  struct mlx4_qp_context *context);
+
 static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 {
 	return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
-- 
cgit v1.2.3


From 65541cb7cf353946ecd78016a453b453b8830656 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 21 Jun 2007 13:03:11 +0300
Subject: IB/mlx4: Implement query SRQ

Signed-off-by: Dotan Barak <dotanb@mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 8209387ee854..cfb78fb2c046 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -323,6 +323,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
 		   u64 db_rec, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
 
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
-- 
cgit v1.2.3


From ec22559e0b7a05283a3413bda5d177e42c950e23 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.de>
Date: Fri, 27 Apr 2007 20:54:57 +0200
Subject: USB: suspend support for usb serial

this implements generic support for suspend/resume for usb serial.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/serial.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 32acbae28d24..e8b8928232c8 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -221,6 +221,9 @@ struct usb_serial_driver {
 	int (*port_probe) (struct usb_serial_port *port);
 	int (*port_remove) (struct usb_serial_port *port);
 
+	int (*suspend) (struct usb_serial *serial, pm_message_t message);
+	int (*resume) (struct usb_serial *serial);
+
 	/* serial function calls */
 	int  (*open)		(struct usb_serial_port *port, struct file * filp);
 	void (*close)		(struct usb_serial_port *port, struct file * filp);
@@ -249,6 +252,9 @@ extern void usb_serial_port_softint(struct usb_serial_port *port);
 extern int usb_serial_probe(struct usb_interface *iface, const struct usb_device_id *id);
 extern void usb_serial_disconnect(struct usb_interface *iface);
 
+extern int usb_serial_suspend(struct usb_interface *intf, pm_message_t message);
+extern int usb_serial_resume(struct usb_interface *intf);
+
 extern int ezusb_writememory (struct usb_serial *serial, int address, unsigned char *data, int length, __u8 bRequest);
 extern int ezusb_set_reset (struct usb_serial *serial, unsigned char reset_bit);
 
@@ -269,6 +275,7 @@ extern void usb_serial_put(struct usb_serial *serial);
 extern int usb_serial_generic_open (struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write (struct usb_serial_port *port, const unsigned char *buf, int count);
 extern void usb_serial_generic_close (struct usb_serial_port *port, struct file *filp);
+extern int usb_serial_generic_resume (struct usb_serial *serial);
 extern int usb_serial_generic_write_room (struct usb_serial_port *port);
 extern int usb_serial_generic_chars_in_buffer (struct usb_serial_port *port);
 extern void usb_serial_generic_read_bulk_callback (struct urb *urb);
-- 
cgit v1.2.3


From 0458d5b4c9cc4ca0f62625d0144ddc4b4bc97a3c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 4 May 2007 11:52:20 -0400
Subject: USB: add USB-Persist facility

This patch (as886) adds the controversial USB-persist facility,
allowing USB devices to persist across a power loss during system
suspend.

The facility is controlled by a new Kconfig option (with appropriate
warnings about the potential dangers); when the option is off the
behavior will remain the same as it is now.  But when the option is
on, people will be able to use suspend-to-disk and keep their USB
filesystems intact -- something particularly valuable for small
machines where the root filesystem is on a USB device!

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 56aa2ee21f1b..3d63e0c2dd70 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -403,6 +403,7 @@ struct usb_device {
 
 	unsigned auto_pm:1;		/* autosuspend/resume in progress */
 	unsigned do_remote_wakeup:1;	/* remote wakeup should be enabled */
+	unsigned reset_resume:1;	/* needs reset instead of resume */
 	unsigned autosuspend_disabled:1; /* autosuspend and autoresume */
 	unsigned autoresume_disabled:1;  /*  disabled by the user */
 #endif
@@ -819,7 +820,10 @@ struct usbdrv_wrap {
  * @pre_reset: Called by usb_reset_composite_device() when the device
  *	is about to be reset.
  * @post_reset: Called by usb_reset_composite_device() after the device
- *	has been reset.
+ *	has been reset, or in lieu of @resume following a reset-resume
+ *	(i.e., the device is reset instead of being resumed, as might
+ *	happen if power was lost).  The second argument tells which is
+ *	the reason.
  * @id_table: USB drivers use ID table to support hotplugging.
  *	Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
  *	or your driver's probe function will never get called.
@@ -861,7 +865,7 @@ struct usb_driver {
 	int (*resume) (struct usb_interface *intf);
 
 	void (*pre_reset) (struct usb_interface *intf);
-	void (*post_reset) (struct usb_interface *intf);
+	void (*post_reset) (struct usb_interface *intf, int reset_resume);
 
 	const struct usb_device_id *id_table;
 
-- 
cgit v1.2.3


From 6bc6cff52e0c4c4c876b1b8a5750041da61ad42b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 4 May 2007 11:53:03 -0400
Subject: USB: add RESET_RESUME device quirk

This patch (as888) adds a new USB device quirk for devices which are
unable to resume correctly.  By using the new code added for the
USB-persist facility, it is a simple matter to reset these devices
instead of resuming them.  To get things kicked off, a quirk entry is
added for the Philips PSC805.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 6bac8faacbc6..8da374caf582 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -9,3 +9,6 @@
 
 /* string descriptors must not be fetched using a 255-byte read */
 #define USB_QUIRK_STRING_FETCH_255	0x00000002
+
+/* device can't resume correctly so reset it instead */
+#define USB_QUIRK_RESET_RESUME		0x00000004
-- 
cgit v1.2.3


From 8538f96ae5aada1c04d69a993b20ad160b191d47 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Thu, 10 May 2007 00:32:24 +0100
Subject: USB: add USB_DEVICE_AND_INTERFACE_INFO for device matching

Recently, the USB device matching code stopped matching generic interface
matches against devices with vendor-specific device class values.

Some drivers now need to explicitly match USB device ID's (in addition to
generic interface info) to retain the same behaviour as before. This new macro,
suggested by Alan Stern, makes the explicit device/interface matching a little
simpler for those users.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3d63e0c2dd70..98e0338664fb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -771,6 +771,28 @@ static inline int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor
 	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO, .bInterfaceClass = (cl), \
 	.bInterfaceSubClass = (sc), .bInterfaceProtocol = (pr)
 
+/**
+ * USB_DEVICE_AND_INTERFACE_INFO - macro used to describe a specific usb device
+ * 		with a class of usb interfaces
+ * @vend: the 16 bit USB Vendor ID
+ * @prod: the 16 bit USB Product ID
+ * @cl: bInterfaceClass value
+ * @sc: bInterfaceSubClass value
+ * @pr: bInterfaceProtocol value
+ *
+ * This macro is used to create a struct usb_device_id that matches a
+ * specific device with a specific class of interfaces.
+ *
+ * This is especially useful when explicitly matching devices that have
+ * vendor specific bDeviceClass values, but standards-compliant interfaces.
+ */
+#define USB_DEVICE_AND_INTERFACE_INFO(vend,prod,cl,sc,pr) \
+	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO \
+		| USB_DEVICE_ID_MATCH_DEVICE, \
+	.idVendor = (vend), .idProduct = (prod), \
+	.bInterfaceClass = (cl), \
+	.bInterfaceSubClass = (sc), .bInterfaceProtocol = (pr)
+
 /* ----------------------------------------------------------------------- */
 
 /* Stuff for dynamic usb ids */
-- 
cgit v1.2.3


From a5262dcfda9163ca1f8a64349a6f7ba640ac1dc2 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Mon, 14 May 2007 19:36:41 -0700
Subject: USB: export <linux/usb_gadgetfs> as <linux/usb/gadgetfs.h>

Make sure gadgetfs userspace interface is properly exported:

 - Move <linux/usb_gadgetfs.h> to <linux/usb/gadgetfs.h>;
 - Export it using Kbuild;
 - Add an #include guard;
 - Correct some internal documentation;
 - Update struct layout so it's the same on 32/64 bit kernels.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/Kbuild     |  1 +
 include/linux/usb/gadgetfs.h | 81 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb_gadgetfs.h | 75 ----------------------------------------
 3 files changed, 82 insertions(+), 75 deletions(-)
 create mode 100644 include/linux/usb/gadgetfs.h
 delete mode 100644 include/linux/usb_gadgetfs.h

(limited to 'include/linux')

diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
index 43f160cfe003..6ce42bf9f743 100644
--- a/include/linux/usb/Kbuild
+++ b/include/linux/usb/Kbuild
@@ -1,5 +1,6 @@
 unifdef-y += audio.h
 unifdef-y += cdc.h
 unifdef-y += ch9.h
+unifdef-y += gadgetfs.h
 unifdef-y += midi.h
 
diff --git a/include/linux/usb/gadgetfs.h b/include/linux/usb/gadgetfs.h
new file mode 100644
index 000000000000..e8654c338729
--- /dev/null
+++ b/include/linux/usb/gadgetfs.h
@@ -0,0 +1,81 @@
+#ifndef __LINUX_USB_GADGETFS_H
+#define __LINUX_USB_GADGETFS_H
+
+#include <asm/types.h>
+#include <asm/ioctl.h>
+
+#include <linux/usb/ch9.h>
+
+/*
+ * Filesystem based user-mode API to USB Gadget controller hardware
+ *
+ * Other than ep0 operations, most things are done by read() and write()
+ * on endpoint files found in one directory.  They are configured by
+ * writing descriptors, and then may be used for normal stream style
+ * i/o requests.  When ep0 is configured, the device can enumerate;
+ * when it's closed, the device disconnects from usb.  Operations on
+ * ep0 require ioctl() operations.
+ *
+ * Configuration and device descriptors get written to /dev/gadget/$CHIP,
+ * which may then be used to read usb_gadgetfs_event structs.  The driver
+ * may activate endpoints as it handles SET_CONFIGURATION setup events,
+ * or earlier; writing endpoint descriptors to /dev/gadget/$ENDPOINT
+ * then performing data transfers by reading or writing.
+ */
+
+/*
+ * Events are delivered on the ep0 file descriptor, when the user mode driver
+ * reads from this file descriptor after writing the descriptors.  Don't
+ * stop polling this descriptor.
+ */
+
+enum usb_gadgetfs_event_type {
+	GADGETFS_NOP = 0,
+
+	GADGETFS_CONNECT,
+	GADGETFS_DISCONNECT,
+	GADGETFS_SETUP,
+	GADGETFS_SUSPEND,
+	// and likely more !
+};
+
+/* NOTE:  this structure must stay the same size and layout on
+ * both 32-bit and 64-bit kernels.
+ */
+struct usb_gadgetfs_event {
+	union {
+		// NOP, DISCONNECT, SUSPEND: nothing
+		// ... some hardware can't report disconnection
+
+		// CONNECT: just the speed
+		enum usb_device_speed	speed;
+
+		// SETUP: packet; DATA phase i/o precedes next event
+		// (setup.bmRequestType & USB_DIR_IN) flags direction
+		// ... includes SET_CONFIGURATION, SET_INTERFACE
+		struct usb_ctrlrequest	setup;
+	} u;
+	enum usb_gadgetfs_event_type	type;
+};
+
+
+/* endpoint ioctls */
+
+/* IN transfers may be reported to the gadget driver as complete
+ *	when the fifo is loaded, before the host reads the data;
+ * OUT transfers may be reported to the host's "client" driver as
+ *	complete when they're sitting in the FIFO unread.
+ * THIS returns how many bytes are "unclaimed" in the endpoint fifo
+ * (needed for precise fault handling, when the hardware allows it)
+ */
+#define	GADGETFS_FIFO_STATUS	_IO('g',1)
+
+/* discards any unclaimed data in the fifo. */
+#define	GADGETFS_FIFO_FLUSH	_IO('g',2)
+
+/* resets endpoint halt+toggle; used to implement set_interface.
+ * some hardware (like pxa2xx) can't support this.
+ */
+#define	GADGETFS_CLEAR_HALT	_IO('g',3)
+
+#endif /* __LINUX_USB_GADGETFS_H */
diff --git a/include/linux/usb_gadgetfs.h b/include/linux/usb_gadgetfs.h
deleted file mode 100644
index 8086d5a9b94e..000000000000
--- a/include/linux/usb_gadgetfs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-
-#include <asm/types.h>
-#include <asm/ioctl.h>
-
-#include <linux/usb/ch9.h>
-
-/*
- * Filesystem based user-mode API to USB Gadget controller hardware
- *
- * Almost everything can be done with only read and write operations,
- * on endpoint files found in one directory.  They are configured by
- * writing descriptors, and then may be used for normal stream style
- * i/o requests.  When ep0 is configured, the device can enumerate;
- * when it's closed, the device disconnects from usb.
- *
- * Configuration and device descriptors get written to /dev/gadget/$CHIP,
- * which may then be used to read usb_gadgetfs_event structs.  The driver
- * may activate endpoints as it handles SET_CONFIGURATION setup events,
- * or earlier; writing endpoint descriptors to /dev/gadget/$ENDPOINT
- * then performing data transfers by reading or writing.
- */
-
-/*
- * Events are delivered on the ep0 file descriptor, if the user mode driver
- * reads from this file descriptor after writing the descriptors.  Don't
- * stop polling this descriptor, if you write that kind of driver.
- */
-
-enum usb_gadgetfs_event_type {
-	GADGETFS_NOP = 0,
-
-	GADGETFS_CONNECT,
-	GADGETFS_DISCONNECT,
-	GADGETFS_SETUP,
-	GADGETFS_SUSPEND,
-	// and likely more !
-};
-
-struct usb_gadgetfs_event {
-	enum usb_gadgetfs_event_type	type;
-	union {
-		// NOP, DISCONNECT, SUSPEND: nothing
-		// ... some hardware can't report disconnection
-
-		// CONNECT: just the speed
-		enum usb_device_speed	speed;
-
-		// SETUP: packet; DATA phase i/o precedes next event
-		// (setup.bmRequestType & USB_DIR_IN) flags direction 
-		// ... includes SET_CONFIGURATION, SET_INTERFACE
-		struct usb_ctrlrequest	setup;
-	} u;
-};
-
-
-/* endpoint ioctls */
-
-/* IN transfers may be reported to the gadget driver as complete
- * 	when the fifo is loaded, before the host reads the data;
- * OUT transfers may be reported to the host's "client" driver as
- * 	complete when they're sitting in the FIFO unread.
- * THIS returns how many bytes are "unclaimed" in the endpoint fifo
- * (needed for precise fault handling, when the hardware allows it)
- */
-#define	GADGETFS_FIFO_STATUS	_IO('g',1)
-
-/* discards any unclaimed data in the fifo. */
-#define	GADGETFS_FIFO_FLUSH	_IO('g',2)
-
-/* resets endpoint halt+toggle; used to implement set_interface.
- * some hardware (like pxa2xx) can't support this.
- */
-#define	GADGETFS_CLEAR_HALT	_IO('g',3)
-
-
-- 
cgit v1.2.3


From 51a2f077c44e559841b09de6da605b4d3ae40dad Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.de>
Date: Fri, 25 May 2007 13:40:56 +0200
Subject: USB: introduce usb_anchor

- introduction of usb_anchor and its methods

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 98e0338664fb..0873c6219efc 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1000,11 +1000,26 @@ struct usb_iso_packet_descriptor {
 
 struct urb;
 
+struct usb_anchor {
+	struct list_head urb_list;
+	wait_queue_head_t wait;
+	spinlock_t lock;
+};
+
+static inline void init_usb_anchor(struct usb_anchor *anchor)
+{
+	INIT_LIST_HEAD(&anchor->urb_list);
+	init_waitqueue_head(&anchor->wait);
+	spin_lock_init(&anchor->lock);
+}
+
 typedef void (*usb_complete_t)(struct urb *);
 
 /**
  * struct urb - USB Request Block
  * @urb_list: For use by current owner of the URB.
+ * @anchor_list: membership in the list of an anchor
+ * @anchor: to anchor URBs to a common mooring
  * @pipe: Holds endpoint number, direction, type, and more.
  *	Create these values with the eight macros available;
  *	usb_{snd,rcv}TYPEpipe(dev,endpoint), where the TYPE is "ctrl"
@@ -1177,6 +1192,8 @@ struct urb
 	/* public: documented fields in the urb that can be used by drivers */
 	struct list_head urb_list;	/* list head for use by the urb's
 					 * current owner */
+	struct list_head anchor_list;	/* the URB may be anchored by the driver */
+	struct usb_anchor *anchor;
 	struct usb_device *dev; 	/* (in) pointer to associated device */
 	unsigned int pipe;		/* (in) pipe information */
 	int status;			/* (return) non-ISO status */
@@ -1312,6 +1329,11 @@ extern struct urb *usb_get_urb(struct urb *urb);
 extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags);
 extern int usb_unlink_urb(struct urb *urb);
 extern void usb_kill_urb(struct urb *urb);
+extern void usb_kill_anchored_urbs(struct usb_anchor *anchor);
+extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor);
+extern void usb_unanchor_urb(struct urb *urb);
+extern int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor,
+					 unsigned int timeout);
 
 void *usb_buffer_alloc (struct usb_device *dev, size_t size,
 	gfp_t mem_flags, dma_addr_t *dma);
-- 
cgit v1.2.3


From f07600cf9eb3ee92777b2001e564faa413144a99 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 May 2007 15:38:16 -0400
Subject: USB: add reset_resume method

This patch (as918) introduces a new USB driver method: reset_resume.
It is called when a device needs to be reset as part of a resume
procedure (whether because of a device quirk or because of the
USB-Persist facility), thereby taking over a role formerly assigned to
the post_reset method.  As a consequence, post_reset no longer needs
an argument indicating whether it is being called as part of a
reset-resume.  This separation of functions makes the code clearer.

In addition, the pre_reset and post_reset method return types are
changed; they now must return an error code.  The return value is
unused at present, but at some later time we may unbind drivers and
re-probe if they encounter an error during reset handling.

The existing pre_reset and post_reset methods in the usbhid,
usb-storage, and hub drivers are updated to match the new
requirements.  For usbhid the post_reset routine is also used for
reset_resume (duplicate method pointers); for the other drivers a new
reset_resume routine is added.  The change to hub.c looks bigger than
it really is, because mark_children_for_reset_resume() gets moved down
next to the new hub_reset_resume() routine.

A minor change to usb-storage makes the usb_stor_report_bus_reset()
routine acquire the host lock instead of requiring the caller to hold
it already.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
CC: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0873c6219efc..bde8c65e2bfc 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -839,6 +839,8 @@ struct usbdrv_wrap {
  *	do (or don't) show up otherwise in the filesystem.
  * @suspend: Called when the device is going to be suspended by the system.
  * @resume: Called when the device is being resumed by the system.
+ * @reset_resume: Called when the suspended device has been reset instead
+ *	of being resumed.
  * @pre_reset: Called by usb_reset_composite_device() when the device
  *	is about to be reset.
  * @post_reset: Called by usb_reset_composite_device() after the device
@@ -885,9 +887,10 @@ struct usb_driver {
 
 	int (*suspend) (struct usb_interface *intf, pm_message_t message);
 	int (*resume) (struct usb_interface *intf);
+	int (*reset_resume)(struct usb_interface *intf);
 
-	void (*pre_reset) (struct usb_interface *intf);
-	void (*post_reset) (struct usb_interface *intf, int reset_resume);
+	int (*pre_reset)(struct usb_interface *intf);
+	int (*post_reset)(struct usb_interface *intf);
 
 	const struct usb_device_id *id_table;
 
-- 
cgit v1.2.3


From b41a60eca833d76593d4dac8a59f5c38714194ee Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 May 2007 15:39:33 -0400
Subject: USB: add power/persist device attribute

This patch (as920) adds an extra level of protection to the
USB-Persist facility.  Now it will apply by default only to hubs; for
all other devices the user must enable it explicitly by setting the
power/persist device attribute.

The disconnect_all_children() routine in hub.c has been removed and
its code placed inline.  This is the way it was originally as part of
hub_pre_reset(); the revised usage in hub_reset_resume() is
sufficiently different that the code can no longer be shared.
Likewise, mark_children_for_reset() is now inline as part of
hub_reset_resume().  The end result looks much cleaner than before.

The sysfs interface is updated to add the new attribute file, and
there are corresponding documentation updates.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index bde8c65e2bfc..efce9a4c511c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -404,6 +404,7 @@ struct usb_device {
 	unsigned auto_pm:1;		/* autosuspend/resume in progress */
 	unsigned do_remote_wakeup:1;	/* remote wakeup should be enabled */
 	unsigned reset_resume:1;	/* needs reset instead of resume */
+	unsigned persist_enabled:1;	/* USB_PERSIST enabled for this dev */
 	unsigned autosuspend_disabled:1; /* autosuspend and autoresume */
 	unsigned autoresume_disabled:1;  /*  disabled by the user */
 #endif
-- 
cgit v1.2.3


From 8b3b01c898a44c2fc7217eb579982b9d132113f5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 13 Jun 2007 08:02:11 +0200
Subject: USB: Add URB_FREE_BUFFER flag and the logic behind it

USB: Add URB_FREE_BUFFER flag for freeing the transfer buffer

In some cases it is not needed that the driver keeps track of the
transfer buffer of an URB. It can be simply freed along with the
URB itself when the reference count goes down to zero. The new
flag URB_FREE_BUFFER enables this behavior.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index efce9a4c511c..533c32374e01 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -994,6 +994,7 @@ extern int usb_disabled(void);
 #define URB_ZERO_PACKET		0x0040	/* Finish bulk OUT with short packet */
 #define URB_NO_INTERRUPT	0x0080	/* HINT: no non-error interrupt
 					 * needed */
+#define URB_FREE_BUFFER		0x0100	/* Free transfer buffer with the URB */
 
 struct usb_iso_packet_descriptor {
 	unsigned int offset;
-- 
cgit v1.2.3


From 165fe97ed6107d3cde63592d5ac36400a5eb9f6f Mon Sep 17 00:00:00 2001
From: "Craig W. Nadler" <craig@nadler.us>
Date: Fri, 15 Jun 2007 23:14:35 -0400
Subject: USB: add IAD support to usbfs and sysfs

USB_IAD: Adds support for USB Interface Association Descriptors.

This patch adds support to the USB host stack for parsing, storing, and
displaying Interface Association Descriptors. In /proc/bus/usb/devices
lines starting with A: show the fields in an IAD. In sysfs if an
interface on a USB device is referenced by an IAD the following files
will be added to the sysfs directory for that interface:
iad_bFirstInterface, iad_bInterfaceCount, iad_bFunctionClass, and
iad_bFunctionSubClass, iad_bFunctionProtocol

Signed-off-by: Craig W. Nadler <craig@nadler.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 533c32374e01..7a60946df3b6 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -146,6 +146,10 @@ struct usb_interface {
 					 * active alternate setting */
 	unsigned num_altsetting;	/* number of alternate settings */
 
+	/* If there is an interface association descriptor then it will list
+	 * the associated interfaces */
+	struct usb_interface_assoc_descriptor *intf_assoc;
+
 	int minor;			/* minor number this interface is
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
@@ -175,6 +179,7 @@ void usb_put_intf(struct usb_interface *intf);
 
 /* this maximum is arbitrary */
 #define USB_MAXINTERFACES	32
+#define USB_MAXIADS		USB_MAXINTERFACES/2
 
 /**
  * struct usb_interface_cache - long-term representation of a device interface
@@ -245,6 +250,11 @@ struct usb_host_config {
 	struct usb_config_descriptor	desc;
 
 	char *string;		/* iConfiguration string, if present */
+
+	/* List of any Interface Association Descriptors in this
+	 * configuration. */
+	struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS];
+
 	/* the interfaces associated with this configuration,
 	 * stored in no particular order */
 	struct usb_interface *interface[USB_MAXINTERFACES];
-- 
cgit v1.2.3


From 9d8bab58b758cd5a96d368a8cc64111c9ab50407 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 1 Jul 2007 11:04:54 -0700
Subject: usb gadget stack: remove usb_ep_*_buffer(), part 1

Remove usb_ep_{alloc,free}_buffer() calls, for small dma-coherent buffers.
This patch just removes the interface and its users; later patches will
remove controller driver support.

  - This interface is invariably not implemented correctly in the
    controller drivers (e.g. using dma pools, a mechanism which
    post-dates the interface by several years).

  - At this point no gadget driver really *needs* to use it.  In
    current kernels, any driver that needs such a mechanism could
    allocate a dma pool themselves.

Removing this interface is thus a simplification and improvement.

Note that the gmidi.c driver had a bug in this area; fixed.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_gadget.h | 41 -----------------------------------------
 1 file changed, 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h
index e17186dbcdca..703fd84c46fc 100644
--- a/include/linux/usb_gadget.h
+++ b/include/linux/usb_gadget.h
@@ -234,47 +234,6 @@ usb_ep_free_request (struct usb_ep *ep, struct usb_request *req)
 	ep->ops->free_request (ep, req);
 }
 
-/**
- * usb_ep_alloc_buffer - allocate an I/O buffer
- * @ep:the endpoint associated with the buffer
- * @len:length of the desired buffer
- * @dma:pointer to the buffer's DMA address; must be valid
- * @gfp_flags:GFP_* flags to use
- *
- * Returns a new buffer, or null if one could not be allocated.
- * The buffer is suitably aligned for dma, if that endpoint uses DMA,
- * and the caller won't have to care about dma-inconsistency
- * or any hidden "bounce buffer" mechanism.  No additional per-request
- * DMA mapping will be required for such buffers.
- * Free it later with usb_ep_free_buffer().
- *
- * You don't need to use this call to allocate I/O buffers unless you
- * want to make sure drivers don't incur costs for such "bounce buffer"
- * copies or per-request DMA mappings.
- */
-static inline void *
-usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma,
-	gfp_t gfp_flags)
-{
-	return ep->ops->alloc_buffer (ep, len, dma, gfp_flags);
-}
-
-/**
- * usb_ep_free_buffer - frees an i/o buffer
- * @ep:the endpoint associated with the buffer
- * @buf:CPU view address of the buffer
- * @dma:the buffer's DMA address
- * @len:length of the buffer
- *
- * reverses the effect of usb_ep_alloc_buffer().
- * caller guarantees the buffer will no longer be accessed
- */
-static inline void
-usb_ep_free_buffer (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned len)
-{
-	ep->ops->free_buffer (ep, buf, dma, len);
-}
-
 /**
  * usb_ep_queue - queues (submits) an I/O request to an endpoint.
  * @ep:the endpoint associated with the request
-- 
cgit v1.2.3


From c67ab134ba9f83f9de86e58adfeaa14a9efa6e00 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 1 Jul 2007 12:21:00 -0700
Subject: usb gadget stack: remove usb_ep_*_buffer(), part 2

This patch removes controller driver infrastructure which supported
the now-removed usb_ep_{alloc,free}_buffer() calls.

As can be seen, many of the implementations of this were broken to
various degrees.  Many didn't properly return dma-coherent mappings;
those which did so were necessarily ugly because of bogosity in the
underlying dma_free_coherent() calls ... which on many platforms
can't be called from the same contexts (notably in_irq) from which
their dma_alloc_coherent() sibling can be called.

The main potential downside of removing this is that gadget drivers
wouldn't have specific knowledge that the controller drivers have:
endpoints that aren't dma-capable don't need any dma mappings at all.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_gadget.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h
index 703fd84c46fc..4f59b2aa8a9e 100644
--- a/include/linux/usb_gadget.h
+++ b/include/linux/usb_gadget.h
@@ -110,13 +110,6 @@ struct usb_ep_ops {
 		gfp_t gfp_flags);
 	void (*free_request) (struct usb_ep *ep, struct usb_request *req);
 
-	void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes,
-		dma_addr_t *dma, gfp_t gfp_flags);
-	void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma,
-		unsigned bytes);
-	// NOTE:  on 2.6, drivers may also use dma_map() and
-	// dma_sync_single_*() to directly manage dma overhead. 
-
 	int (*queue) (struct usb_ep *ep, struct usb_request *req,
 		gfp_t gfp_flags);
 	int (*dequeue) (struct usb_ep *ep, struct usb_request *req);
-- 
cgit v1.2.3


From 7405f74badf46b5d023c5d2b670b4471525f6c91 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 2 Jan 2007 11:10:43 -0700
Subject: dmaengine: refactor dmaengine around dma_async_tx_descriptor

The current dmaengine interface defines mutliple routines per operation,
i.e. dma_async_memcpy_buf_to_buf, dma_async_memcpy_buf_to_page etc.  Adding
more operation types (xor, crc, etc) to this model would result in an
unmanageable number of method permutations.

	Are we really going to add a set of hooks for each DMA engine
	whizbang feature?
		- Jeff Garzik

The descriptor creation process is refactored using the new common
dma_async_tx_descriptor structure.  Instead of per driver
do_<operation>_<dest>_to_<src> methods, drivers integrate
dma_async_tx_descriptor into their private software descriptor and then
define a 'prep' routine per operation.  The prep routine allocates a
descriptor and ensures that the tx_set_src, tx_set_dest, tx_submit routines
are valid.  Descriptor creation and submission becomes:

struct dma_device *dev;
struct dma_chan *chan;
struct dma_async_tx_descriptor *tx;

tx = dev->device_prep_dma_<operation>(chan, len, int_flag)
tx->tx_set_src(dma_addr_t, tx, index /* for multi-source ops */)
tx->tx_set_dest(dma_addr_t, tx, index)
tx->tx_submit(tx)

In addition to the refactoring, dma_async_tx_descriptor also lays the
groundwork for definining cross-channel-operation dependencies, and a
callback facility for asynchronous notification of operation completion.

Changelog:
* drop dma mapping methods, suggested by Chris Leech
* fix ioat_dma_dependency_added, also caught by Andrew Morton
* fix dma_sync_wait, change from Andrew Morton
* uninline large functions, change from Andrew Morton
* add tx->callback = NULL to dmaengine calls to interoperate with async_tx
  calls
* hookup ioat_tx_submit
* convert channel capabilities to a 'cpumask_t like' bitmap
* removed DMA_TX_ARRAY_INIT, no longer needed
* checkpatch.pl fixes
* make set_src, set_dest, and tx_submit descriptor specific methods
* fixup git-ioat merge
* move group_list and phys to dma_async_tx_descriptor

Cc: Jeff Garzik <jeff@garzik.org>
Cc: Chris Leech <christopher.leech@intel.com>
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dmaengine.h | 237 +++++++++++++++++++++++++++++-----------------
 1 file changed, 149 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c94d8f1d62e5..3de1cf71031a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -21,13 +21,12 @@
 #ifndef DMAENGINE_H
 #define DMAENGINE_H
 
-#ifdef CONFIG_DMA_ENGINE
-
 #include <linux/device.h>
 #include <linux/uio.h>
 #include <linux/kref.h>
 #include <linux/completion.h>
 #include <linux/rcupdate.h>
+#include <linux/dma-mapping.h>
 
 /**
  * enum dma_event - resource PNP/power managment events
@@ -64,6 +63,31 @@ enum dma_status {
 	DMA_ERROR,
 };
 
+/**
+ * enum dma_transaction_type - DMA transaction types/indexes
+ */
+enum dma_transaction_type {
+	DMA_MEMCPY,
+	DMA_XOR,
+	DMA_PQ_XOR,
+	DMA_DUAL_XOR,
+	DMA_PQ_UPDATE,
+	DMA_ZERO_SUM,
+	DMA_PQ_ZERO_SUM,
+	DMA_MEMSET,
+	DMA_MEMCPY_CRC32C,
+	DMA_INTERRUPT,
+};
+
+/* last transaction type for creation of the capabilities mask */
+#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
+
+/**
+ * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
+ * See linux/cpumask.h
+ */
+typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
+
 /**
  * struct dma_chan_percpu - the per-CPU part of struct dma_chan
  * @refcount: local_t used for open-coded "bigref" counting
@@ -157,48 +181,106 @@ struct dma_client {
 	struct list_head	global_node;
 };
 
+typedef void (*dma_async_tx_callback)(void *dma_async_param);
+/**
+ * struct dma_async_tx_descriptor - async transaction descriptor
+ * ---dma generic offload fields---
+ * @cookie: tracking cookie for this transaction, set to -EBUSY if
+ *	this tx is sitting on a dependency list
+ * @ack: the descriptor can not be reused until the client acknowledges
+ *	receipt, i.e. has has a chance to establish any dependency chains
+ * @phys: physical address of the descriptor
+ * @tx_list: driver common field for operations that require multiple
+ *	descriptors
+ * @chan: target channel for this operation
+ * @tx_submit: set the prepared descriptor(s) to be executed by the engine
+ * @tx_set_dest: set a destination address in a hardware descriptor
+ * @tx_set_src: set a source address in a hardware descriptor
+ * @callback: routine to call after this operation is complete
+ * @callback_param: general parameter to pass to the callback routine
+ * ---async_tx api specific fields---
+ * @depend_list: at completion this list of transactions are submitted
+ * @depend_node: allow this transaction to be executed after another
+ *	transaction has completed, possibly on another channel
+ * @parent: pointer to the next level up in the dependency chain
+ * @lock: protect the dependency list
+ */
+struct dma_async_tx_descriptor {
+	dma_cookie_t cookie;
+	int ack;
+	dma_addr_t phys;
+	struct list_head tx_list;
+	struct dma_chan *chan;
+	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
+	void (*tx_set_dest)(dma_addr_t addr,
+		struct dma_async_tx_descriptor *tx, int index);
+	void (*tx_set_src)(dma_addr_t addr,
+		struct dma_async_tx_descriptor *tx, int index);
+	dma_async_tx_callback callback;
+	void *callback_param;
+	struct list_head depend_list;
+	struct list_head depend_node;
+	struct dma_async_tx_descriptor *parent;
+	spinlock_t lock;
+};
+
 /**
  * struct dma_device - info on the entity supplying DMA services
  * @chancnt: how many DMA channels are supported
  * @channels: the list of struct dma_chan
  * @global_node: list_head for global dma_device_list
+ * @cap_mask: one or more dma_capability flags
+ * @max_xor: maximum number of xor sources, 0 if no capability
  * @refcount: reference count
  * @done: IO completion struct
  * @dev_id: unique device ID
+ * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
  *	number of allocated descriptors
  * @device_free_chan_resources: release DMA channel's resources
- * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer
- * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page
- * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset
- * @device_memcpy_complete: poll the status of an IOAT DMA transaction
- * @device_memcpy_issue_pending: push appended descriptors to hardware
+ * @device_prep_dma_memcpy: prepares a memcpy operation
+ * @device_prep_dma_xor: prepares a xor operation
+ * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_memset: prepares a memset operation
+ * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
+ * @device_dependency_added: async_tx notifies the channel about new deps
+ * @device_issue_pending: push pending transactions to hardware
  */
 struct dma_device {
 
 	unsigned int chancnt;
 	struct list_head channels;
 	struct list_head global_node;
+	dma_cap_mask_t  cap_mask;
+	int max_xor;
 
 	struct kref refcount;
 	struct completion done;
 
 	int dev_id;
+	struct device *dev;
 
 	int (*device_alloc_chan_resources)(struct dma_chan *chan);
 	void (*device_free_chan_resources)(struct dma_chan *chan);
-	dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan,
-			void *dest, void *src, size_t len);
-	dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan,
-			struct page *page, unsigned int offset, void *kdata,
-			size_t len);
-	dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan,
-			struct page *dest_pg, unsigned int dest_off,
-			struct page *src_pg, unsigned int src_off, size_t len);
-	enum dma_status (*device_memcpy_complete)(struct dma_chan *chan,
+
+	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
+		struct dma_chan *chan, size_t len, int int_en);
+	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
+		struct dma_chan *chan, unsigned int src_cnt, size_t len,
+		int int_en);
+	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+		struct dma_chan *chan, unsigned int src_cnt, size_t len,
+		u32 *result, int int_en);
+	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
+		struct dma_chan *chan, int value, size_t len, int int_en);
+	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
+		struct dma_chan *chan);
+
+	void (*device_dependency_added)(struct dma_chan *chan);
+	enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
 			dma_cookie_t cookie, dma_cookie_t *last,
 			dma_cookie_t *used);
-	void (*device_memcpy_issue_pending)(struct dma_chan *chan);
+	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
 /* --- public DMA engine API --- */
@@ -207,96 +289,72 @@ struct dma_client *dma_async_client_register(dma_event_callback event_callback);
 void dma_async_client_unregister(struct dma_client *client);
 void dma_async_client_chan_request(struct dma_client *client,
 		unsigned int number);
+dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
+	void *dest, void *src, size_t len);
+dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
+	struct page *page, unsigned int offset, void *kdata, size_t len);
+dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
+	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
+	unsigned int src_off, size_t len);
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+	struct dma_chan *chan);
 
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
-	void *dest, void *src, size_t len)
-{
-	int cpu = get_cpu();
-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-	put_cpu();
 
-	return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
+static inline void
+async_tx_ack(struct dma_async_tx_descriptor *tx)
+{
+	tx->ack = 1;
 }
 
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
-	struct page *page, unsigned int offset, void *kdata, size_t len)
+#define first_dma_cap(mask) __first_dma_cap(&(mask))
+static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
 {
-	int cpu = get_cpu();
-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-	put_cpu();
+	return min_t(int, DMA_TX_TYPE_END,
+		find_first_bit(srcp->bits, DMA_TX_TYPE_END));
+}
 
-	return chan->device->device_memcpy_buf_to_pg(chan, page, offset,
-	                                             kdata, len);
+#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
+static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
+{
+	return min_t(int, DMA_TX_TYPE_END,
+		find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
 }
 
-/**
- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
- * @chan: DMA channel to offload copy to
- * @dest_pg: destination page
- * @dest_off: offset in page to copy to
- * @src_pg: source page
- * @src_off: offset in page to copy from
- * @len: length
- *
- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
- * address according to the DMA mapping API rules for streaming mappings.
- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
- * (kernel memory or locked user space pages).
- */
-static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
-	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
-	unsigned int src_off, size_t len)
+#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
+static inline void
+__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
 {
-	int cpu = get_cpu();
-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-	put_cpu();
+	set_bit(tx_type, dstp->bits);
+}
 
-	return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off,
-	                                            src_pg, src_off, len);
+#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
+static inline int
+__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
+{
+	return test_bit(tx_type, srcp->bits);
 }
 
+#define for_each_dma_cap_mask(cap, mask) \
+	for ((cap) = first_dma_cap(mask);	\
+		(cap) < DMA_TX_TYPE_END;	\
+		(cap) = next_dma_cap((cap), (mask)))
+
 /**
- * dma_async_memcpy_issue_pending - flush pending copies to HW
+ * dma_async_issue_pending - flush pending transactions to HW
  * @chan: target DMA channel
  *
  * This allows drivers to push copies to HW in batches,
  * reducing MMIO writes where possible.
  */
-static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
+static inline void dma_async_issue_pending(struct dma_chan *chan)
 {
-	return chan->device->device_memcpy_issue_pending(chan);
+	return chan->device->device_issue_pending(chan);
 }
 
+#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
+
 /**
- * dma_async_memcpy_complete - poll for transaction completion
+ * dma_async_is_tx_complete - poll for transaction completion
  * @chan: DMA channel
  * @cookie: transaction identifier to check status of
  * @last: returns last completed cookie, can be NULL
@@ -306,12 +364,15 @@ static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
  * internal state and can be used with dma_async_is_complete() to check
  * the status of multiple cookies without re-checking hardware state.
  */
-static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan,
+static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
 	dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
 {
-	return chan->device->device_memcpy_complete(chan, cookie, last, used);
+	return chan->device->device_is_tx_complete(chan, cookie, last, used);
 }
 
+#define dma_async_memcpy_complete(chan, cookie, last, used)\
+	dma_async_is_tx_complete(chan, cookie, last, used)
+
 /**
  * dma_async_is_complete - test a cookie against chan state
  * @cookie: transaction identifier to test status of
@@ -334,6 +395,7 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
 	return DMA_IN_PROGRESS;
 }
 
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 
 /* --- DMA device --- */
 
@@ -362,5 +424,4 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
 	struct dma_pinned_list *pinned_list, struct page *page,
 	unsigned int offset, size_t len);
 
-#endif /* CONFIG_DMA_ENGINE */
 #endif /* DMAENGINE_H */
-- 
cgit v1.2.3


From d379b01e9087a582d58f4b678208a4f8d8376fe7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 9 Jul 2007 11:56:42 -0700
Subject: dmaengine: make clients responsible for managing channels

The current implementation assumes that a channel will only be used by one
client at a time.  In order to enable channel sharing the dmaengine core is
changed to a model where clients subscribe to channel-available-events.
Instead of tracking how many channels a client wants and how many it has
received the core just broadcasts the available channels and lets the
clients optionally take a reference.  The core learns about the clients'
needs at dma_event_callback time.

In support of multiple operation types, clients can specify a capability
mask to only be notified of channels that satisfy a certain set of
capabilities.

Changelog:
* removed DMA_TX_ARRAY_INIT, no longer needed
* dma_client_chan_free -> dma_chan_release: switch to global reference
  counting only at device unregistration time, before it was also happening
  at client unregistration time
* clients now return dma_state_client to dmaengine (ack, dup, nak)
* checkpatch.pl fixes
* fixup merge with git-ioat

Cc: Chris Leech <christopher.leech@intel.com>
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dmaengine.h | 58 ++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3de1cf71031a..a3b6035b6c86 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -29,19 +29,31 @@
 #include <linux/dma-mapping.h>
 
 /**
- * enum dma_event - resource PNP/power managment events
+ * enum dma_state - resource PNP/power managment state
  * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
  * @DMA_RESOURCE_RESUME: DMA device returning to full power
- * @DMA_RESOURCE_ADDED: DMA device added to the system
+ * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
  * @DMA_RESOURCE_REMOVED: DMA device removed from the system
  */
-enum dma_event {
+enum dma_state {
 	DMA_RESOURCE_SUSPEND,
 	DMA_RESOURCE_RESUME,
-	DMA_RESOURCE_ADDED,
+	DMA_RESOURCE_AVAILABLE,
 	DMA_RESOURCE_REMOVED,
 };
 
+/**
+ * enum dma_state_client - state of the channel in the client
+ * @DMA_ACK: client would like to use, or was using this channel
+ * @DMA_DUP: client has already seen this channel, or is not using this channel
+ * @DMA_NAK: client does not want to see any more channels
+ */
+enum dma_state_client {
+	DMA_ACK,
+	DMA_DUP,
+	DMA_NAK,
+};
+
 /**
  * typedef dma_cookie_t - an opaque DMA cookie
  *
@@ -104,7 +116,6 @@ struct dma_chan_percpu {
 
 /**
  * struct dma_chan - devices supply DMA channels, clients use them
- * @client: ptr to the client user of this chan, will be %NULL when unused
  * @device: ptr to the dma device who supplies this channel, always !%NULL
  * @cookie: last cookie value returned to client
  * @chan_id: channel ID for sysfs
@@ -112,12 +123,10 @@ struct dma_chan_percpu {
  * @refcount: kref, used in "bigref" slow-mode
  * @slow_ref: indicates that the DMA channel is free
  * @rcu: the DMA channel's RCU head
- * @client_node: used to add this to the client chan list
  * @device_node: used to add this to the device chan list
  * @local: per-cpu pointer to a struct dma_chan_percpu
  */
 struct dma_chan {
-	struct dma_client *client;
 	struct dma_device *device;
 	dma_cookie_t cookie;
 
@@ -129,11 +138,11 @@ struct dma_chan {
 	int slow_ref;
 	struct rcu_head rcu;
 
-	struct list_head client_node;
 	struct list_head device_node;
 	struct dma_chan_percpu *local;
 };
 
+
 void dma_chan_cleanup(struct kref *kref);
 
 static inline void dma_chan_get(struct dma_chan *chan)
@@ -158,26 +167,31 @@ static inline void dma_chan_put(struct dma_chan *chan)
 
 /*
  * typedef dma_event_callback - function pointer to a DMA event callback
+ * For each channel added to the system this routine is called for each client.
+ * If the client would like to use the channel it returns '1' to signal (ack)
+ * the dmaengine core to take out a reference on the channel and its
+ * corresponding device.  A client must not 'ack' an available channel more
+ * than once.  When a channel is removed all clients are notified.  If a client
+ * is using the channel it must 'ack' the removal.  A client must not 'ack' a
+ * removed channel more than once.
+ * @client - 'this' pointer for the client context
+ * @chan - channel to be acted upon
+ * @state - available or removed
  */
-typedef void (*dma_event_callback) (struct dma_client *client,
-		struct dma_chan *chan, enum dma_event event);
+struct dma_client;
+typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
+		struct dma_chan *chan, enum dma_state state);
 
 /**
  * struct dma_client - info on the entity making use of DMA services
  * @event_callback: func ptr to call when something happens
- * @chan_count: number of chans allocated
- * @chans_desired: number of chans requested. Can be +/- chan_count
- * @lock: protects access to the channels list
- * @channels: the list of DMA channels allocated
+ * @cap_mask: only return channels that satisfy the requested capabilities
+ *  a value of zero corresponds to any capability
  * @global_node: list_head for global dma_client_list
  */
 struct dma_client {
 	dma_event_callback	event_callback;
-	unsigned int		chan_count;
-	unsigned int		chans_desired;
-
-	spinlock_t		lock;
-	struct list_head	channels;
+	dma_cap_mask_t		cap_mask;
 	struct list_head	global_node;
 };
 
@@ -285,10 +299,9 @@ struct dma_device {
 
 /* --- public DMA engine API --- */
 
-struct dma_client *dma_async_client_register(dma_event_callback event_callback);
+void dma_async_client_register(struct dma_client *client);
 void dma_async_client_unregister(struct dma_client *client);
-void dma_async_client_chan_request(struct dma_client *client,
-		unsigned int number);
+void dma_async_client_chan_request(struct dma_client *client);
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
 dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
@@ -299,7 +312,6 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan);
 
-
 static inline void
 async_tx_ack(struct dma_async_tx_descriptor *tx)
 {
-- 
cgit v1.2.3


From 685784aaf3cd0e3ff5e36c7ecf6f441cdbf57f73 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 9 Jul 2007 11:56:42 -0700
Subject: xor: make 'xor_blocks' a library routine for use with async_tx

The async_tx api tries to use a dma engine for an operation, but will fall
back to an optimized software routine otherwise.  Xor support is
implemented using the raid5 xor routines.  For organizational purposes this
routine is moved to a common area.

The following fixes are also made:
* rename xor_block => xor_blocks, suggested by Adrian Bunk
* ensure that xor.o initializes before md.o in the built-in case
* checkpatch.pl fixes
* mark calibrate_xor_blocks __init, Adrian Bunk

Cc: Adrian Bunk <bunk@stusta.de>
Cc: NeilBrown <neilb@suse.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/raid/xor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index f0d67cbdea40..7d6c20b654fa 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -5,7 +5,7 @@
 
 #define MAX_XOR_BLOCKS 5
 
-extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
+extern void xor_blocks(unsigned int count, unsigned int bytes, void **ptr);
 
 struct xor_block_template {
         struct xor_block_template *next;
-- 
cgit v1.2.3


From 9bc89cd82d6f88fb0ca39b30445c329a430fd66b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 2 Jan 2007 11:10:44 -0700
Subject: async_tx: add the async_tx api

The async_tx api provides methods for describing a chain of asynchronous
bulk memory transfers/transforms with support for inter-transactional
dependencies.  It is implemented as a dmaengine client that smooths over
the details of different hardware offload engine implementations.  Code
that is written to the api can optimize for asynchronous operation and the
api will fit the chain of operations to the available offload resources.

	I imagine that any piece of ADMA hardware would register with the
	'async_*' subsystem, and a call to async_X would be routed as
	appropriate, or be run in-line. - Neil Brown

async_tx exploits the capabilities of struct dma_async_tx_descriptor to
provide an api of the following general format:

struct dma_async_tx_descriptor *
async_<operation>(..., struct dma_async_tx_descriptor *depend_tx,
			dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx, <operation>);
	struct dma_device *device = chan ? chan->device : NULL;
	int int_en = cb_fn ? 1 : 0;
	struct dma_async_tx_descriptor *tx = device ?
		device->device_prep_dma_<operation>(chan, len, int_en) : NULL;

	if (tx) { /* run <operation> asynchronously */
		...
		tx->tx_set_dest(addr, tx, index);
		...
		tx->tx_set_src(addr, tx, index);
		...
		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
	} else { /* run <operation> synchronously */
		...
		<operation>
		...
		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
	}

	return tx;
}

async_tx_find_channel() returns a capable channel from its pool.  The
channel pool is organized as a per-cpu array of channel pointers.  The
async_tx_rebalance() routine is tasked with managing these arrays.  In the
uniprocessor case async_tx_rebalance() tries to spread responsibility
evenly over channels of similar capabilities.  For example if there are two
copy+xor channels, one will handle copy operations and the other will
handle xor.  In the SMP case async_tx_rebalance() attempts to spread the
operations evenly over the cpus, e.g. cpu0 gets copy channel0 and xor
channel0 while cpu1 gets copy channel 1 and xor channel 1.  When a
dependency is specified async_tx_find_channel defaults to keeping the
operation on the same channel.  A xor->copy->xor chain will stay on one
channel if it supports both operation types, otherwise the transaction will
transition between a copy and a xor resource.

Currently the raid5 implementation in the MD raid456 driver has been
converted to the async_tx api.  A driver for the offload engines on the
Intel Xscale series of I/O processors, iop-adma, is provided in a later
commit.  With the iop-adma driver and async_tx, raid456 is able to offload
copy, xor, and xor-zero-sum operations to hardware engines.

On iop342 tiobench showed higher throughput for sequential writes (20 - 30%
improvement) and sequential reads to a degraded array (40 - 55%
improvement).  For the other cases performance was roughly equal, +/- a few
percentage points.  On a x86-smp platform the performance of the async_tx
implementation (in synchronous mode) was also +/- a few percentage points
of the original implementation.  According to 'top' on iop342 CPU
utilization drops from ~50% to ~15% during a 'resync' while the speed
according to /proc/mdstat doubles from ~25 MB/s to ~50 MB/s.

The tiobench command line used for testing was: tiobench --size 2048
--block 4096 --block 131072 --dir /mnt/raid --numruns 5
* iop342 had 1GB of memory available

Details:
* if CONFIG_DMA_ENGINE=n the asynchronous path is compiled away by making
  async_tx_find_channel a static inline routine that always returns NULL
* when a callback is specified for a given transaction an interrupt will
  fire at operation completion time and the callback will occur in a
  tasklet.  if the the channel does not support interrupts then a live
  polling wait will be performed
* the api is written as a dmaengine client that requests all available
  channels
* In support of dependencies the api implicitly schedules channel-switch
  interrupts.  The interrupt triggers the cleanup tasklet which causes
  pending operations to be scheduled on the next channel
* Xor engines treat an xor destination address differently than a software
  xor routine.  To the software routine the destination address is an implied
  source, whereas engines treat it as a write-only destination.  This patch
  modifies the xor_blocks routine to take a an explicit destination address
  to mirror the hardware.

Changelog:
* fixed a leftover debug print
* don't allow callbacks in async_interrupt_cond
* fixed xor_block changes
* fixed usage of ASYNC_TX_XOR_DROP_DEST
* drop dma mapping methods, suggested by Chris Leech
* printk warning fixups from Andrew Morton
* don't use inline in C files, Adrian Bunk
* select the API when MD is enabled
* BUG_ON xor source counts <= 1
* implicitly handle hardware concerns like channel switching and
  interrupts, Neil Brown
* remove the per operation type list, and distribute operation capabilities
  evenly amongst the available channels
* simplify async_tx_find_channel to optimize the fast path
* introduce the channel_table_initialized flag to prevent early calls to
  the api
* reorganize the code to mimic crypto
* include mm.h as not all archs include it in dma-mapping.h
* make the Kconfig options non-user visible, Adrian Bunk
* move async_tx under crypto since it is meant as 'core' functionality, and
  the two may share algorithms in the future
* move large inline functions into c files
* checkpatch.pl fixes
* gpl v2 only correction

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---
 include/linux/async_tx.h | 156 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/raid/xor.h |   5 +-
 2 files changed, 159 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/async_tx.h

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
new file mode 100644
index 000000000000..ff1255079fa1
--- /dev/null
+++ b/include/linux/async_tx.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef _ASYNC_TX_H_
+#define _ASYNC_TX_H_
+#include <linux/dmaengine.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+/**
+ * dma_chan_ref - object used to manage dma channels received from the
+ *   dmaengine core.
+ * @chan - the channel being tracked
+ * @node - node for the channel to be placed on async_tx_master_list
+ * @rcu - for list_del_rcu
+ * @count - number of times this channel is listed in the pool
+ *	(for channels with multiple capabiities)
+ */
+struct dma_chan_ref {
+	struct dma_chan *chan;
+	struct list_head node;
+	struct rcu_head rcu;
+	atomic_t count;
+};
+
+/**
+ * async_tx_flags - modifiers for the async_* calls
+ * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the
+ * the destination address is not a source.  The asynchronous case handles this
+ * implicitly, the synchronous case needs to zero the destination block.
+ * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is
+ * also one of the source addresses.  In the synchronous case the destination
+ * address is an implied source, whereas the asynchronous case it must be listed
+ * as a source.  The destination address must be the first address in the source
+ * array.
+ * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
+ * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
+ * dependency chain
+ * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
+ * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
+ * take an atomic mapping (KM_USER0) on the source page(s)
+ * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
+ * take an atomic mapping (KM_USER0) on the dest page(s)
+ */
+enum async_tx_flags {
+	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
+	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
+	ASYNC_TX_ASSUME_COHERENT = (1 << 2),
+	ASYNC_TX_ACK		 = (1 << 3),
+	ASYNC_TX_DEP_ACK	 = (1 << 4),
+	ASYNC_TX_KMAP_SRC	 = (1 << 5),
+	ASYNC_TX_KMAP_DST	 = (1 << 6),
+};
+
+#ifdef CONFIG_DMA_ENGINE
+void async_tx_issue_pending_all(void);
+enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
+void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
+struct dma_chan *
+async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
+	enum dma_transaction_type tx_type);
+#else
+static inline void async_tx_issue_pending_all(void)
+{
+	do { } while (0);
+}
+
+static inline enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+	return DMA_SUCCESS;
+}
+
+static inline void
+async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
+	struct dma_chan *host_chan)
+{
+	do { } while (0);
+}
+
+static inline struct dma_chan *
+async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
+	enum dma_transaction_type tx_type)
+{
+	return NULL;
+}
+#endif
+
+/**
+ * async_tx_sync_epilog - actions to take if an operation is run synchronously
+ * @flags: async_tx flags
+ * @depend_tx: transaction depends on depend_tx
+ * @cb_fn: function to call when the transaction completes
+ * @cb_fn_param: parameter to pass to the callback routine
+ */
+static inline void
+async_tx_sync_epilog(unsigned long flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param)
+{
+	if (cb_fn)
+		cb_fn(cb_fn_param);
+
+	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+		async_tx_ack(depend_tx);
+}
+
+void
+async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+struct dma_async_tx_descriptor *
+async_xor(struct page *dest, struct page **src_list, unsigned int offset,
+	int src_cnt, size_t len, enum async_tx_flags flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+struct dma_async_tx_descriptor *
+async_xor_zero_sum(struct page *dest, struct page **src_list,
+	unsigned int offset, int src_cnt, size_t len,
+	u32 *result, enum async_tx_flags flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+struct dma_async_tx_descriptor *
+async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
+	unsigned int src_offset, size_t len, enum async_tx_flags flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+struct dma_async_tx_descriptor *
+async_memset(struct page *dest, int val, unsigned int offset,
+	size_t len, enum async_tx_flags flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+struct dma_async_tx_descriptor *
+async_trigger_callback(enum async_tx_flags flags,
+	struct dma_async_tx_descriptor *depend_tx,
+	dma_async_tx_callback cb_fn, void *cb_fn_param);
+#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 7d6c20b654fa..3e120587eada 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -3,9 +3,10 @@
 
 #include <linux/raid/md.h>
 
-#define MAX_XOR_BLOCKS 5
+#define MAX_XOR_BLOCKS 4
 
-extern void xor_blocks(unsigned int count, unsigned int bytes, void **ptr);
+extern void xor_blocks(unsigned int count, unsigned int bytes,
+	void *dest, void **srcs);
 
 struct xor_block_template {
         struct xor_block_template *next;
-- 
cgit v1.2.3


From a445685647e825c713175d180ffc8dd54d90589b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 9 Jul 2007 11:56:43 -0700
Subject: raid5: refactor handle_stripe5 and handle_stripe6 (v3)

handle_stripe5 and handle_stripe6 have very deep logic paths handling the
various states of a stripe_head.  By introducing the 'stripe_head_state'
and 'r6_state' objects, large portions of the logic can be moved to
sub-routines.

'struct stripe_head_state' consumes all of the automatic variables that previously
stood alone in handle_stripe5,6.  'struct r6_state' contains the handle_stripe6
specific variables like p_failed and q_failed.

One of the nice side effects of the 'stripe_head_state' change is that it
allows for further reductions in code duplication between raid5 and raid6.
The following new routines are shared between raid5 and raid6:

	handle_completed_write_requests
	handle_requests_to_failed_array
	handle_stripe_expansion

Changes:
* v2: fixed 'conf->raid_disk-1' for the raid6 'handle_stripe_expansion' path
* v3: removed the unused 'dirty' field from struct stripe_head_state
* v3: coalesced open coded bi_end_io routines into return_io()

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid5.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index d8286db60b96..b99d354f6128 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -145,6 +145,22 @@ struct stripe_head {
 		unsigned long	flags;
 	} dev[1]; /* allocated with extra space depending of RAID geometry */
 };
+
+/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
+ *     for handle_stripe.  It is only valid under spin_lock(sh->lock);
+ */
+struct stripe_head_state {
+	int syncing, expanding, expanded;
+	int locked, uptodate, to_read, to_write, failed, written;
+	int non_overwrite;
+	int failed_num;
+};
+
+/* r6_state - extra state data only relevant to r6 */
+struct r6_state {
+	int p_failed, q_failed, qd_idx, failed_num[2];
+};
+
 /* Flags */
 #define	R5_UPTODATE	0	/* page contains current data */
 #define	R5_LOCKED	1	/* IO has been submitted on "req" */
-- 
cgit v1.2.3


From 91c00924846a0034020451c280c76baa4299f9dc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 2 Jan 2007 13:52:30 -0700
Subject: md: raid5_run_ops - run stripe operations outside sh->lock

When the raid acceleration work was proposed, Neil laid out the following
attack plan:

1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api

The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.

To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests.  In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing.  The following flags outline the
requests that handle_stripe can make of raid5_run_ops:

STRIPE_OP_BIOFILL
 - copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
 - generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
 - subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
 - copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
 - recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
 - verify that the parity is correct
STRIPE_OP_IO
 - submit i/o to the member disks (note this was already performed outside
   the stripe lock, but it made sense to add it as an operation type

The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
   operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
   sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
   new operations that were previously blocked

Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.

Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
  ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
  handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
  was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid5.h | 81 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index b99d354f6128..6fb9d94e6f2e 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -116,13 +116,46 @@
  *  attach a request to an active stripe (add_stripe_bh())
  *     lockdev attach-buffer unlockdev
  *  handle a stripe (handle_stripe())
- *     lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io
+ *     lockstripe clrSTRIPE_HANDLE ...
+ *		(lockdev check-buffers unlockdev) ..
+ *		change-state ..
+ *		record io/ops needed unlockstripe schedule io/ops
  *  release an active stripe (release_stripe())
  *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
  *
  * The refcount counts each thread that have activated the stripe,
  * plus raid5d if it is handling it, plus one for each active request
- * on a cached buffer.
+ * on a cached buffer, and plus one if the stripe is undergoing stripe
+ * operations.
+ *
+ * Stripe operations are performed outside the stripe lock,
+ * the stripe operations are:
+ * -copying data between the stripe cache and user application buffers
+ * -computing blocks to save a disk access, or to recover a missing block
+ * -updating the parity on a write operation (reconstruct write and
+ *  read-modify-write)
+ * -checking parity correctness
+ * -running i/o to disk
+ * These operations are carried out by raid5_run_ops which uses the async_tx
+ * api to (optionally) offload operations to dedicated hardware engines.
+ * When requesting an operation handle_stripe sets the pending bit for the
+ * operation and increments the count.  raid5_run_ops is then run whenever
+ * the count is non-zero.
+ * There are some critical dependencies between the operations that prevent some
+ * from being requested while another is in flight.
+ * 1/ Parity check operations destroy the in cache version of the parity block,
+ *    so we prevent parity dependent operations like writes and compute_blocks
+ *    from starting while a check is in progress.  Some dma engines can perform
+ *    the check without damaging the parity block, in these cases the parity
+ *    block is re-marked up to date (assuming the check was successful) and is
+ *    not re-read from disk.
+ * 2/ When a write operation is requested we immediately lock the affected
+ *    blocks, and mark them as not up to date.  This causes new read requests
+ *    to be held off, as well as parity checks and compute block operations.
+ * 3/ Once a compute block operation has been requested handle_stripe treats
+ *    that block as if it is up to date.  raid5_run_ops guaruntees that any
+ *    operation that is dependent on the compute block result is initiated after
+ *    the compute block completes.
  */
 
 struct stripe_head {
@@ -136,11 +169,26 @@ struct stripe_head {
 	spinlock_t		lock;
 	int			bm_seq;	/* sequence number for bitmap flushes */
 	int			disks;			/* disks in stripe */
+	/* stripe_operations
+	 * @pending - pending ops flags (set for request->issue->complete)
+	 * @ack - submitted ops flags (set for issue->complete)
+	 * @complete - completed ops flags (set for complete)
+	 * @target - STRIPE_OP_COMPUTE_BLK target
+	 * @count - raid5_runs_ops is set to run when this is non-zero
+	 */
+	struct stripe_operations {
+		unsigned long	   pending;
+		unsigned long	   ack;
+		unsigned long	   complete;
+		int		   target;
+		int		   count;
+		u32		   zero_sum_result;
+	} ops;
 	struct r5dev {
 		struct bio	req;
 		struct bio_vec	vec;
 		struct page	*page;
-		struct bio	*toread, *towrite, *written;
+		struct bio	*toread, *read, *towrite, *written;
 		sector_t	sector;			/* sector of this page */
 		unsigned long	flags;
 	} dev[1]; /* allocated with extra space depending of RAID geometry */
@@ -174,6 +222,15 @@ struct r6_state {
 #define	R5_ReWrite	9	/* have tried to over-write the readerror */
 
 #define	R5_Expanded	10	/* This block now has post-expand data */
+#define	R5_Wantcompute	11 /* compute_block in progress treat as
+				    * uptodate
+				    */
+#define	R5_Wantfill	12 /* dev->toread contains a bio that needs
+				    * filling
+				    */
+#define	R5_Wantprexor	13 /* distinguish blocks ready for rmw from
+				    * other "towrites"
+				    */
 /*
  * Write method
  */
@@ -195,6 +252,24 @@ struct r6_state {
 #define	STRIPE_EXPANDING	9
 #define	STRIPE_EXPAND_SOURCE	10
 #define	STRIPE_EXPAND_READY	11
+/*
+ * Operations flags (in issue order)
+ */
+#define STRIPE_OP_BIOFILL	0
+#define STRIPE_OP_COMPUTE_BLK	1
+#define STRIPE_OP_PREXOR	2
+#define STRIPE_OP_BIODRAIN	3
+#define STRIPE_OP_POSTXOR	4
+#define STRIPE_OP_CHECK	5
+#define STRIPE_OP_IO		6
+
+/* modifiers to the base operations
+ * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
+ * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
+ */
+#define STRIPE_OP_MOD_REPAIR_PD 7
+#define STRIPE_OP_MOD_DMA_CHECK 8
+
 /*
  * Plugging:
  *
-- 
cgit v1.2.3


From f38e12199a94ca458e4f03c5a2c984fb80adadc5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 2 Jan 2007 13:52:30 -0700
Subject: md: handle_stripe5 - add request/completion logic for async compute
 ops

handle_stripe will compute a block when a backing disk has failed, or when
it determines it can save a disk read by computing the block from all the
other up-to-date blocks.

Previously a block would be computed under the lock and subsequent logic in
handle_stripe could use the newly up-to-date block.  With the raid5_run_ops
implementation the compute operation is carried out a later time outside
the lock.  To preserve the old functionality we take advantage of the
dependency chain feature of async_tx to flag the block as R5_Wantcompute
and then let other parts of handle_stripe operate on the block as if it
were up-to-date.  raid5_run_ops guarantees that the block will be ready
before it is used in another operation.

However, this only works in cases where the compute and the dependent
operation are scheduled at the same time.  If a previous call to
handle_stripe sets the R5_Wantcompute flag there is no facility to pass the
async_tx dependency chain across successive calls to raid5_run_ops.  The
req_compute variable protects against this case.

Changelog:
* remove the req_compute BUG_ON

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid5.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 6fb9d94e6f2e..2293015de1d5 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -200,7 +200,7 @@ struct stripe_head {
 struct stripe_head_state {
 	int syncing, expanding, expanded;
 	int locked, uptodate, to_read, to_write, failed, written;
-	int non_overwrite;
+	int compute, req_compute, non_overwrite;
 	int failed_num;
 };
 
-- 
cgit v1.2.3


From b5e98d65d34a1c11a2135ea8a9b2619dbc7216c8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 2 Jan 2007 13:52:31 -0700
Subject: md: handle_stripe5 - add request/completion logic for async read ops

When a read bio is attached to the stripe and the corresponding block is
marked R5_UPTODATE, then a read (biofill) operation is scheduled to copy
the data from the stripe cache to the bio buffer.  handle_stripe flags the
blocks to be operated on with the R5_Wantfill flag.  If new read requests
arrive while raid5_run_ops is running they will not be handled until
handle_stripe is scheduled to run again.

Changelog:
* cleanup to_read and to_fill accounting
* do not fail reads that have reached the cache

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid5.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 2293015de1d5..93678f57ccbe 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -200,7 +200,7 @@ struct stripe_head {
 struct stripe_head_state {
 	int syncing, expanding, expanded;
 	int locked, uptodate, to_read, to_write, failed, written;
-	int compute, req_compute, non_overwrite;
+	int to_fill, compute, req_compute, non_overwrite;
 	int failed_num;
 };
 
-- 
cgit v1.2.3


From 3039f0735a280b54c7364fbfe6a9287f7f0b510a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2007 08:06:19 -0700
Subject: ioatdma: add the unisys "i/oat" pci vendor/device id

Cc: John Magolan <john.magolan@unisys.com>
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/pci_ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5b1c9994f89a..0275f6917c8e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -475,6 +475,9 @@
 #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361
 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL	0x252
 
+#define PCI_VENDOR_ID_UNISYS		0x1018
+#define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C
+
 #define PCI_VENDOR_ID_COMPEX2		0x101a /* pci.ids says "AT&T GIS (NCR)" */
 #define PCI_DEVICE_ID_COMPEX2_100VG	0x0005
 
-- 
cgit v1.2.3


From f787a50306680c187cf2896a8017937c1bf6dc7e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 11 Jul 2007 21:21:47 +0200
Subject: [PATCH] sched: small topology.h cleanup

trivial cleanup: LOCAL_DISTANCE and REMOTE_DISTANCE are only used in
topology.h and inside an #ifndef section - limit their existence to
that #ifndef.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index da6c39b2d051..d0890a7e5bab 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -50,10 +50,10 @@
 	for_each_online_node(node)						\
 		if (nr_cpus_node(node))
 
-#ifndef node_distance
 /* Conform to ACPI 2.0 SLIT distance definitions */
 #define LOCAL_DISTANCE		10
 #define REMOTE_DISTANCE		20
+#ifndef node_distance
 #define node_distance(from,to)	((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
 #endif
 #ifndef RECLAIM_DISTANCE
-- 
cgit v1.2.3


From 24023451c8df726692e2f52288a20870d13b501f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 14 Jul 2007 18:51:31 -0700
Subject: [NET]: Add net_device change_rx_mode callback

Currently the set_multicast_list (and set_rx_mode) callbacks are
responsible for configuring the device according to the IFF_PROMISC,
IFF_MULTICAST and IFF_ALLMULTI flags and the mc_list (and uc_list in
case of set_rx_mode).

These callbacks can be invoked from BH context without the rtnl_mutex
by dev_mc_add/dev_mc_delete, which makes reading the device flags and
promiscous/allmulti count racy. For real hardware drivers that just
commit all changes to the hardware this is not a real problem since
the stack guarantees to call them for every change, so at least the
final call will not race and commit the correct configuration to the
hardware.

For software devices that want to synchronize promiscous and multicast
state to an underlying device however this can cause corruption of the
underlying device's flags or promisc/allmulti counts.

When the software device is concurrently put in promiscous or allmulti
mode while set_multicast_list is invoked from bottem half context, the
device might synchronize the change to the underlying device without
holding the rtnl_mutex, which races with concurrent changes to the
underlying device.

Add a dev->change_rx_flags hook that is invoked when any of the flags
that affect rx filtering change (under the rtnl_mutex), which allows
drivers to perform synchronization immediately and only synchronize
the address lists in set_multicast_list/set_rx_mode.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 79cc3dab4be7..f193aba30384 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -516,6 +516,9 @@ struct net_device
 						void *saddr,
 						unsigned len);
 	int			(*rebuild_header)(struct sk_buff *skb);
+#define HAVE_CHANGE_RX_FLAGS
+	void			(*change_rx_flags)(struct net_device *dev,
+						   int flags);
 #define HAVE_SET_RX_MODE
 	void			(*set_rx_mode)(struct net_device *dev);
 #define HAVE_MULTICAST			 
-- 
cgit v1.2.3


From a0a400d79e3dd7843e7e81baa3ef2957bdc292d0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 14 Jul 2007 18:52:02 -0700
Subject: [NET]: dev_mcast: add multicast list synchronization helpers

The method drivers currently use to synchronize multicast lists is not
very pretty:

- walk the multicast list
- search each entry on a copy of the previous list
- if new add to lower device
- walk the copy of the previous list
- search each entry on the current list
- if removed delete from lower device
- copy entire list

This patch adds a new field to struct dev_addr_list to store the
synchronization state and adds two helper functions for synchronization
and cleanup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f193aba30384..e5af458ab04b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -190,6 +190,7 @@ struct dev_addr_list
 	struct dev_addr_list	*next;
 	u8			da_addr[MAX_ADDR_LEN];
 	u8			da_addrlen;
+	u8			da_synced;
 	int			da_users;
 	int			da_gusers;
 };
@@ -1103,6 +1104,8 @@ extern int		dev_unicast_delete(struct net_device *dev, void *addr, int alen);
 extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
+extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
+extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
 extern void		dev_mc_discard(struct net_device *dev);
 extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
-- 
cgit v1.2.3


From 6c78dcbd47a68a7d25d2bee7a6c74b9136cb5fde Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 14 Jul 2007 18:52:56 -0700
Subject: [VLAN]: Fix promiscous/allmulti synchronization races

The set_multicast_list function may be called without holding the rtnl
mutex, resulting in races when changing the underlying device's promiscous
and allmulti state. Use the change_rx_mode hook, which is always invoked
under the rtnl.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 61a57dc2ac99..7f71df4c952f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -132,8 +132,6 @@ struct vlan_dev_info {
                                            * made, in order to feed the right changes down
                                            * to the real hardware...
                                            */
-	int old_allmulti;               /* similar to above. */
-	int old_promiscuity;            /* similar to above. */
 	struct net_device *real_dev;    /* the underlying device/interface */
 	unsigned char real_dev_addr[ETH_ALEN];
 	struct proc_dir_entry *dent;    /* Holds the proc data */
-- 
cgit v1.2.3


From 56addd6eeeb4e11f5a0af7093ca078e0f29140e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 14 Jul 2007 18:53:28 -0700
Subject: [VLAN]: Use multicast list synchronization helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 7f71df4c952f..f8443fdb124a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -127,11 +127,6 @@ struct vlan_dev_info {
                                         *   like DHCP that use packet-filtering and don't understand
                                         *   802.1Q
                                         */
-	struct dev_mc_list *old_mc_list;  /* old multi-cast list for the VLAN interface..
-                                           * we save this so we can tell what changes were
-                                           * made, in order to feed the right changes down
-                                           * to the real hardware...
-                                           */
 	struct net_device *real_dev;    /* the underlying device/interface */
 	unsigned char real_dev_addr[ETH_ALEN];
 	struct proc_dir_entry *dent;    /* Holds the proc data */
-- 
cgit v1.2.3


From b863ceb7ddcea8c55fcf1d7b2ac591d50aa7ed53 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 14 Jul 2007 18:55:06 -0700
Subject: [NET]: Add macvlan driver

Add macvlan driver, which allows to create virtual ethernet devices
based on MAC address.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 9 +++++++++
 include/linux/netdevice.h  | 2 ++
 2 files changed, 11 insertions(+)
 create mode 100644 include/linux/if_macvlan.h

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
new file mode 100644
index 000000000000..0d9d7ea2c1cc
--- /dev/null
+++ b/include/linux/if_macvlan.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_IF_MACVLAN_H
+#define _LINUX_IF_MACVLAN_H
+
+#ifdef __KERNEL__
+
+extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e5af458ab04b..322b5eae57dd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -564,6 +564,8 @@ struct net_device
 
 	/* bridge stuff */
 	struct net_bridge_port	*br_port;
+	/* macvlan */
+	struct macvlan_port	*macvlan_port;
 
 	/* class/net/name entry */
 	struct device		dev;
-- 
cgit v1.2.3


From 6460d948f3ebf7d5040328a60a0ab7221f69945b Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 14 Jul 2007 19:07:52 -0700
Subject: [NET]: Add ethtool support for NETIF_F_IPV6_CSUM devices.

Add ethtool utility function to set or clear IPV6_CSUM feature flag.
Modify tg3.c and bnx2.c to use this function when doing ethtool -K
to change tx checksum.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f2d248f8cc92..3a632244f31b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -265,6 +265,7 @@ u32 ethtool_op_get_link(struct net_device *dev);
 u32 ethtool_op_get_tx_csum(struct net_device *dev);
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
 int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
+int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data);
 u32 ethtool_op_get_sg(struct net_device *dev);
 int ethtool_op_set_sg(struct net_device *dev, u32 data);
 u32 ethtool_op_get_tso(struct net_device *dev);
-- 
cgit v1.2.3


From 370786f9cfd430cb424f00ce4110e75bb1b95a19 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Sat, 14 Jul 2007 20:47:26 -0700
Subject: [NETFILTER]: x_tables: add connlimit match

ipt_connlimit has been sitting in POM-NG for a long time.
Here is a new shiny xt_connlimit with:

 * xtables'ified
 * will request the layer3 module
   (previously it hotdropped every packet when it was not loaded)
 * fixed: there was a deadlock in case of an OOM condition
 * support for any layer4 protocol (e.g. UDP/SCTP)
 * using jhash, as suggested by Eric Dumazet
 * ipv6 support

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_connlimit.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 include/linux/netfilter/xt_connlimit.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h
new file mode 100644
index 000000000000..90ae8b474cb8
--- /dev/null
+++ b/include/linux/netfilter/xt_connlimit.h
@@ -0,0 +1,17 @@
+#ifndef _XT_CONNLIMIT_H
+#define _XT_CONNLIMIT_H
+
+struct xt_connlimit_data;
+
+struct xt_connlimit_info {
+	union {
+		u_int32_t v4_mask;
+		u_int32_t v6_mask[4];
+	};
+	unsigned int limit, inverse;
+
+	/* this needs to be at the end */
+	struct xt_connlimit_data *data __attribute__((aligned(8)));
+};
+
+#endif /* _XT_CONNLIMIT_H */
-- 
cgit v1.2.3


From 4381ca3c23b07ba5b567f72325003020ddca0341 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sun, 15 Jul 2007 21:00:11 +0100
Subject: fix return type of skb_checksum_complete()

It returns __sum16, not unsigned int

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9391e4a4c344..ce256438e619 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1639,7 +1639,7 @@ static inline int skb_csum_unnecessary(const struct sk_buff *skb)
  *	if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
  *	hardware has already verified the correctness of the checksum.
  */
-static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
+static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 {
 	return skb_csum_unnecessary(skb) ?
 	       0 : __skb_checksum_complete(skb);
-- 
cgit v1.2.3


From 22e03f3b58dfcca30f0c8de185022132459638d1 Mon Sep 17 00:00:00 2001
From: Raphael Assenat <raph@8d.com>
Date: Tue, 27 Feb 2007 19:49:53 +0000
Subject: leds: Add generic GPIO LED driver

This patch adds support for GPIO connected leds via the new GPIO framework.

Information about leds (gpio, polarity, name, default trigger) is passed
to the driver via platform_data.

Signed-off-by: Raphael Assenat <raph@8d.com>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 include/linux/leds.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 88afceffb7cb..059abfe219dc 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -110,4 +110,18 @@ extern void ledtrig_ide_activity(void);
 #define ledtrig_ide_activity() do {} while(0)
 #endif
 
+/* For the leds-gpio driver */
+struct gpio_led {
+	const char *name;
+	char *default_trigger;
+	unsigned 	gpio;
+	u8 		active_low;
+};
+
+struct gpio_led_platform_data {
+	int 		num_leds;
+	struct gpio_led *leds;
+};
+
+
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From f8a7c6fe14f556ca8eeddce258cb21392d0c3a2f Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sun, 8 Jul 2007 23:19:31 +0100
Subject: leds: Convert from struct class_device to struct device

Convert the LEDs class from struct class_device to struct device
since class_device is scheduled for removal.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/leds.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 059abfe219dc..dc1178f6184b 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -16,7 +16,6 @@
 #include <linux/spinlock.h>
 
 struct device;
-struct class_device;
 /*
  * LED Core
  */
@@ -38,7 +37,7 @@ struct led_classdev {
 	void		(*brightness_set)(struct led_classdev *led_cdev,
 					  enum led_brightness brightness);
 
-	struct class_device	*class_dev;
+	struct device		*dev;
 	struct list_head	 node;			/* LED Device list */
 	char			*default_trigger;	/* Trigger to use */
 
-- 
cgit v1.2.3


From 655bfd7aebb12481ab9275284d9500bee5ba3e70 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Mon, 9 Jul 2007 12:17:24 +0100
Subject: backlight: Convert from struct class_device to struct device

Convert the backlight and LCD classes from struct class_device
to struct device since class_device is scheduled for removal.

One nasty API break is the backlight power attribute has had to be
renamed to bl_power and the LCD power attribute has had to be renamed
to lcd_power since the original names clash with the core. I can't see
a way around this.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/backlight.h | 11 ++++++++---
 include/linux/lcd.h       | 14 ++++++++++----
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 1023ba0d6e55..c897c7b03858 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -69,8 +69,8 @@ struct backlight_device {
 
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
-	/* The class device structure */
-	struct class_device class_dev;
+
+	struct device dev;
 };
 
 static inline void backlight_update_status(struct backlight_device *bd)
@@ -85,6 +85,11 @@ extern struct backlight_device *backlight_device_register(const char *name,
 	struct device *dev, void *devdata, struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
 
-#define to_backlight_device(obj) container_of(obj, struct backlight_device, class_dev)
+#define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
+
+static inline void * bl_get_data(struct backlight_device *bl_dev)
+{
+	return dev_get_drvdata(&bl_dev->dev);
+}
 
 #endif
diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 598793c0745b..1d379787f2e7 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -62,8 +62,8 @@ struct lcd_device {
 	struct mutex update_lock;
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
-	/* The class device structure */
-	struct class_device class_dev;
+
+	struct device dev;
 };
 
 static inline void lcd_set_power(struct lcd_device *ld, int power)
@@ -75,9 +75,15 @@ static inline void lcd_set_power(struct lcd_device *ld, int power)
 }
 
 extern struct lcd_device *lcd_device_register(const char *name,
-	void *devdata, struct lcd_ops *ops);
+	struct device *parent, void *devdata, struct lcd_ops *ops);
 extern void lcd_device_unregister(struct lcd_device *ld);
 
-#define to_lcd_device(obj) container_of(obj, struct lcd_device, class_dev)
+#define to_lcd_device(obj) container_of(obj, struct lcd_device, dev)
+
+static inline void * lcd_get_data(struct lcd_device *ld_dev)
+{
+	return dev_get_drvdata(&ld_dev->dev);
+}
+
 
 #endif
-- 
cgit v1.2.3


From 3d6392cfbd7dc11f23058e3493683afab4ac13a3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 9 Jul 2007 12:38:05 +0200
Subject: bsg: support for full generic block layer SG v3

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 12 ++++++++++++
 include/linux/bsg.h    | 21 +++++++++++++++++++++
 include/linux/genhd.h  |  2 ++
 3 files changed, 35 insertions(+)
 create mode 100644 include/linux/bsg.h

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fae138bd2207..53002d40efa2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -41,6 +41,8 @@ struct elevator_queue;
 typedef struct elevator_queue elevator_t;
 struct request_pm_state;
 struct blk_trace;
+struct request;
+struct sg_io_hdr;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -607,6 +609,11 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_BOUNCE_ANY		((u64)blk_max_pfn << PAGE_SHIFT)
 #define BLK_BOUNCE_ISA		(ISA_DMA_THRESHOLD)
 
+/*
+ * default timeout for SG_IO if none specified
+ */
+#define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
+
 #ifdef CONFIG_MMU
 extern int init_emergency_isa_pool(void);
 extern void blk_queue_bounce(request_queue_t *q, struct bio **bio);
@@ -680,6 +687,11 @@ extern int blk_execute_rq(request_queue_t *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
+extern int blk_fill_sghdr_rq(request_queue_t *, struct request *,
+		      struct sg_io_hdr *, int);
+extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *);
+extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *,
+			  struct bio *);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
new file mode 100644
index 000000000000..dc0d7282c4cb
--- /dev/null
+++ b/include/linux/bsg.h
@@ -0,0 +1,21 @@
+#ifndef BSG_H
+#define BSG_H
+
+#if defined(CONFIG_BLK_DEV_BSG)
+struct bsg_class_device {
+	struct class_device *class_dev;
+	struct device *dev;
+	int minor;
+	struct gendisk *disk;
+	struct list_head list;
+};
+
+extern int bsg_register_disk(struct gendisk *);
+extern void bsg_unregister_disk(struct gendisk *);
+#else
+struct bsg_class_device { };
+#define bsg_register_disk(disk)		(0)
+#define bsg_unregister_disk(disk)	do { } while (0)
+#endif
+
+#endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9756fc102a83..8c43d7032612 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -67,6 +67,7 @@ struct partition {
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/workqueue.h>
+#include <linux/bsg.h>
 
 struct partition {
 	unsigned char boot_ind;		/* 0x80 - active */
@@ -91,6 +92,7 @@ struct hd_struct {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
 #endif
+	struct bsg_class_device bsg_dev;
 };
 
 #define GENHD_FL_REMOVABLE			1
-- 
cgit v1.2.3


From 337ad41deae1b56e56731246322a93251df86e79 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <tomof@acm.org>
Date: Wed, 20 Dec 2006 11:18:54 +0100
Subject: block: export blk_verify_command for SG v4

blk_fill_sghdr_rq doesn't work for SG v4 so verify_command needed to
be exported.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 53002d40efa2..f6bc0d03ffad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -692,6 +692,7 @@ extern int blk_fill_sghdr_rq(request_queue_t *, struct request *,
 extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *);
 extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *,
 			  struct bio *);
+extern int blk_verify_command(unsigned char *, int);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
-- 
cgit v1.2.3


From 45977d0e87ac988d04fccfb89221727aaf8d78a4 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <tomof@acm.org>
Date: Wed, 20 Dec 2006 11:19:32 +0100
Subject: bsg: add sg_io_v4 structure

This patch adds sg_io_v4 structure that Doug proposed last month.

There's one major change from the RFC. I dropped iovec, which needs
compat stuff. The bsg code simply calls blk_rq_map_user against
dout_xferp/din_xferp. So if possible, the page frames are directly
mapped. If not possible, the block layer allocates new page frames and
does memory copies.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bsg.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index dc0d7282c4cb..0d212cc06abf 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -1,6 +1,47 @@
 #ifndef BSG_H
 #define BSG_H
 
+struct sg_io_v4 {
+	int32_t guard;		/* [i] 'Q' to differentiate from v3 */
+	uint32_t protocol;	/* [i] 0 -> SCSI , .... */
+	uint32_t subprotocol;	/* [i] 0 -> SCSI command, 1 -> SCSI task
+				   management function, .... */
+
+	uint32_t request_len;	/* [i] in bytes */
+	uint64_t request;	/* [i], [*i] {SCSI: cdb} */
+	uint32_t request_attr;	/* [i] {SCSI: task attribute} */
+	uint32_t request_tag;	/* [i] {SCSI: task tag (only if flagged)} */
+	uint32_t request_priority;	/* [i] {SCSI: task priority} */
+	uint32_t max_response_len;	/* [i] in bytes */
+	uint64_t response;	/* [i], [*o] {SCSI: (auto)sense data} */
+
+	/* "din_" for data in (from device); "dout_" for data out (to device) */
+	uint32_t dout_xfer_len;	/* [i] bytes to be transferred to device */
+	uint32_t din_xfer_len;	/* [i] bytes to be transferred from device */
+	uint64_t dout_xferp;	/* [i], [*i] */
+	uint64_t din_xferp;	/* [i], [*o] */
+
+	uint32_t timeout;	/* [i] units: millisecond */
+	uint32_t flags;		/* [i] bit mask */
+	uint64_t usr_ptr;	/* [i->o] unused internally */
+	uint32_t spare_in;	/* [i] */
+
+	uint32_t driver_status;	/* [o] 0 -> ok */
+	uint32_t transport_status;	/* [o] 0 -> ok */
+	uint32_t device_status;	/* [o] {SCSI: command completion status} */
+	uint32_t retry_delay;	/* [o] {SCSI: status auxiliary information} */
+	uint32_t info;		/* [o] additional information */
+	uint32_t duration;	/* [o] time to complete, in milliseconds */
+	uint32_t response_len;	/* [o] bytes of response actually written */
+	int32_t din_resid;	/* [o] actual_din_xfer_len - din_xfer_len */
+	uint32_t generated_tag;	/* [o] {SCSI: task tag that transport chose} */
+	uint32_t spare_out;	/* [o] */
+
+	uint32_t padding;
+};
+
+#ifdef __KERNEL__
+
 #if defined(CONFIG_BLK_DEV_BSG)
 struct bsg_class_device {
 	struct class_device *class_dev;
@@ -18,4 +59,6 @@ struct bsg_class_device { };
 #define bsg_unregister_disk(disk)	do { } while (0)
 #endif
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3


From 1594a3f0eb526c73bc3915e8da13f2abf0ea1acd Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 20 Dec 2006 11:23:35 +0100
Subject: bsg: use u32 etc instead of uint32_t

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bsg.h | 58 ++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 0d212cc06abf..f968726cfadc 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -2,42 +2,42 @@
 #define BSG_H
 
 struct sg_io_v4 {
-	int32_t guard;		/* [i] 'Q' to differentiate from v3 */
-	uint32_t protocol;	/* [i] 0 -> SCSI , .... */
-	uint32_t subprotocol;	/* [i] 0 -> SCSI command, 1 -> SCSI task
+	s32 guard;		/* [i] 'Q' to differentiate from v3 */
+	u32 protocol;		/* [i] 0 -> SCSI , .... */
+	u32 subprotocol;	/* [i] 0 -> SCSI command, 1 -> SCSI task
 				   management function, .... */
 
-	uint32_t request_len;	/* [i] in bytes */
-	uint64_t request;	/* [i], [*i] {SCSI: cdb} */
-	uint32_t request_attr;	/* [i] {SCSI: task attribute} */
-	uint32_t request_tag;	/* [i] {SCSI: task tag (only if flagged)} */
-	uint32_t request_priority;	/* [i] {SCSI: task priority} */
-	uint32_t max_response_len;	/* [i] in bytes */
-	uint64_t response;	/* [i], [*o] {SCSI: (auto)sense data} */
+	u32 request_len;	/* [i] in bytes */
+	u64 request;		/* [i], [*i] {SCSI: cdb} */
+	u32 request_attr;	/* [i] {SCSI: task attribute} */
+	u32 request_tag;	/* [i] {SCSI: task tag (only if flagged)} */
+	u32 request_priority;	/* [i] {SCSI: task priority} */
+	u32 max_response_len;	/* [i] in bytes */
+	u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
 
 	/* "din_" for data in (from device); "dout_" for data out (to device) */
-	uint32_t dout_xfer_len;	/* [i] bytes to be transferred to device */
-	uint32_t din_xfer_len;	/* [i] bytes to be transferred from device */
-	uint64_t dout_xferp;	/* [i], [*i] */
-	uint64_t din_xferp;	/* [i], [*o] */
+	u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
+	u32 din_xfer_len;	/* [i] bytes to be transferred from device */
+	u64 dout_xferp;		/* [i], [*i] */
+	u64 din_xferp;		/* [i], [*o] */
 
-	uint32_t timeout;	/* [i] units: millisecond */
-	uint32_t flags;		/* [i] bit mask */
-	uint64_t usr_ptr;	/* [i->o] unused internally */
-	uint32_t spare_in;	/* [i] */
+	u32 timeout;		/* [i] units: millisecond */
+	u32 flags;		/* [i] bit mask */
+	u64 usr_ptr;		/* [i->o] unused internally */
+	u32 spare_in;		/* [i] */
 
-	uint32_t driver_status;	/* [o] 0 -> ok */
-	uint32_t transport_status;	/* [o] 0 -> ok */
-	uint32_t device_status;	/* [o] {SCSI: command completion status} */
-	uint32_t retry_delay;	/* [o] {SCSI: status auxiliary information} */
-	uint32_t info;		/* [o] additional information */
-	uint32_t duration;	/* [o] time to complete, in milliseconds */
-	uint32_t response_len;	/* [o] bytes of response actually written */
-	int32_t din_resid;	/* [o] actual_din_xfer_len - din_xfer_len */
-	uint32_t generated_tag;	/* [o] {SCSI: task tag that transport chose} */
-	uint32_t spare_out;	/* [o] */
+	u32 driver_status;	/* [o] 0 -> ok */
+	u32 transport_status;	/* [o] 0 -> ok */
+	u32 device_status;	/* [o] {SCSI: command completion status} */
+	u32 retry_delay;	/* [o] {SCSI: status auxiliary information} */
+	u32 info;		/* [o] additional information */
+	u32 duration;		/* [o] time to complete, in milliseconds */
+	u32 response_len;	/* [o] bytes of response actually written */
+	s32 din_resid;		/* [o] actual_din_xfer_len - din_xfer_len */
+	u32 generated_tag;	/* [o] {SCSI: task tag that transport chose} */
+	u32 spare_out;		/* [o] */
 
-	uint32_t padding;
+	u32 padding;
 };
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 3862153b673516b2efa0447b9b3778f47ac8f8c8 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <tomof@acm.org>
Date: Fri, 22 Dec 2006 09:43:51 +0100
Subject: Replace s32, u32 and u64 with __s32, __u32 and __u64 in bsg.h for
 userspace

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bsg.h | 58 ++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index f968726cfadc..2154a6dfbd53 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -2,42 +2,42 @@
 #define BSG_H
 
 struct sg_io_v4 {
-	s32 guard;		/* [i] 'Q' to differentiate from v3 */
-	u32 protocol;		/* [i] 0 -> SCSI , .... */
-	u32 subprotocol;	/* [i] 0 -> SCSI command, 1 -> SCSI task
+	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
+	__u32 protocol;		/* [i] 0 -> SCSI , .... */
+	__u32 subprotocol;	/* [i] 0 -> SCSI command, 1 -> SCSI task
 				   management function, .... */
 
-	u32 request_len;	/* [i] in bytes */
-	u64 request;		/* [i], [*i] {SCSI: cdb} */
-	u32 request_attr;	/* [i] {SCSI: task attribute} */
-	u32 request_tag;	/* [i] {SCSI: task tag (only if flagged)} */
-	u32 request_priority;	/* [i] {SCSI: task priority} */
-	u32 max_response_len;	/* [i] in bytes */
-	u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
+	__u32 request_len;	/* [i] in bytes */
+	__u64 request;		/* [i], [*i] {SCSI: cdb} */
+	__u32 request_attr;	/* [i] {SCSI: task attribute} */
+	__u32 request_tag;	/* [i] {SCSI: task tag (only if flagged)} */
+	__u32 request_priority;	/* [i] {SCSI: task priority} */
+	__u32 max_response_len;	/* [i] in bytes */
+	__u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
 
 	/* "din_" for data in (from device); "dout_" for data out (to device) */
-	u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
-	u32 din_xfer_len;	/* [i] bytes to be transferred from device */
-	u64 dout_xferp;		/* [i], [*i] */
-	u64 din_xferp;		/* [i], [*o] */
+	__u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
+	__u32 din_xfer_len;	/* [i] bytes to be transferred from device */
+	__u64 dout_xferp;	/* [i], [*i] */
+	__u64 din_xferp;	/* [i], [*o] */
 
-	u32 timeout;		/* [i] units: millisecond */
-	u32 flags;		/* [i] bit mask */
-	u64 usr_ptr;		/* [i->o] unused internally */
-	u32 spare_in;		/* [i] */
+	__u32 timeout;		/* [i] units: millisecond */
+	__u32 flags;		/* [i] bit mask */
+	__u64 usr_ptr;		/* [i->o] unused internally */
+	__u32 spare_in;		/* [i] */
 
-	u32 driver_status;	/* [o] 0 -> ok */
-	u32 transport_status;	/* [o] 0 -> ok */
-	u32 device_status;	/* [o] {SCSI: command completion status} */
-	u32 retry_delay;	/* [o] {SCSI: status auxiliary information} */
-	u32 info;		/* [o] additional information */
-	u32 duration;		/* [o] time to complete, in milliseconds */
-	u32 response_len;	/* [o] bytes of response actually written */
-	s32 din_resid;		/* [o] actual_din_xfer_len - din_xfer_len */
-	u32 generated_tag;	/* [o] {SCSI: task tag that transport chose} */
-	u32 spare_out;		/* [o] */
+	__u32 driver_status;	/* [o] 0 -> ok */
+	__u32 transport_status;	/* [o] 0 -> ok */
+	__u32 device_status;	/* [o] {SCSI: command completion status} */
+	__u32 retry_delay;	/* [o] {SCSI: status auxiliary information} */
+	__u32 info;		/* [o] additional information */
+	__u32 duration;		/* [o] time to complete, in milliseconds */
+	__u32 response_len;	/* [o] bytes of response actually written */
+	__s32 din_resid;	/* [o] actual_din_xfer_len - din_xfer_len */
+	__u32 generated_tag;	/* [o] {SCSI: task tag that transport chose} */
+	__u32 spare_out;	/* [o] */
 
-	u32 padding;
+	__u32 padding;
 };
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 45e79a3acdcf54113b3d7b23e9e64e6541dbfeb5 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 9 Jul 2007 12:39:20 +0200
Subject: bsg: add a request_queue argument to scsi_cmd_ioctl()

bsg uses scsi_cmd_ioctl() for some SCSI/sg ioctl
commands. scsi_cmd_ioctl() gets a request queue from a gendisk
arguement. This prevents bsg being bound to SCSI devices that don't
have a gendisk (like OSD). This adds a request_queue argument to
scsi_cmd_ioctl(). The SCSI/sg ioctl commands doesn't use a gendisk so
it's safe for any SCSI devices to use scsi_cmd_ioctl().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f6bc0d03ffad..2746632c2267 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -644,7 +644,8 @@ extern void blk_requeue_request(request_queue_t *, struct request *);
 extern void blk_plug_device(request_queue_t *);
 extern int blk_remove_plug(request_queue_t *);
 extern void blk_recount_segments(request_queue_t *, struct bio *);
-extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
+extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
+			  struct gendisk *, unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
 		struct gendisk *, struct scsi_ioctl_command __user *);
 
-- 
cgit v1.2.3


From d351af01b9307566135cb0f355ca65d0952c10b5 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 9 Jul 2007 12:40:35 +0200
Subject: bsg: bind bsg to request_queue instead of gendisk

This patch binds bsg devices to request_queue instead of gendisk. Any
objects (like transport entities) can define own request_handler and
create own bsg device.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h |  5 +++++
 include/linux/bsg.h    | 10 +++++-----
 include/linux/genhd.h  |  2 --
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2746632c2267..24b474e05a44 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -14,6 +14,7 @@
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
+#include <linux/bsg.h>
 
 #include <asm/scatterlist.h>
 
@@ -470,6 +471,10 @@ struct request_queue
 	unsigned int		bi_size;
 
 	struct mutex		sysfs_lock;
+
+#if defined(CONFIG_BLK_DEV_BSG)
+	struct bsg_class_device bsg_dev;
+#endif
 };
 
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2154a6dfbd53..0475a6d3ff6a 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -47,16 +47,16 @@ struct bsg_class_device {
 	struct class_device *class_dev;
 	struct device *dev;
 	int minor;
-	struct gendisk *disk;
 	struct list_head list;
+	struct request_queue *queue;
 };
 
-extern int bsg_register_disk(struct gendisk *);
-extern void bsg_unregister_disk(struct gendisk *);
+extern int bsg_register_queue(struct request_queue *, char *);
+extern void bsg_unregister_queue(struct request_queue *);
 #else
 struct bsg_class_device { };
-#define bsg_register_disk(disk)		(0)
-#define bsg_unregister_disk(disk)	do { } while (0)
+#define bsg_register_queue(disk, name)		(0)
+#define bsg_unregister_queue(disk)	do { } while (0)
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8c43d7032612..9756fc102a83 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -67,7 +67,6 @@ struct partition {
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/workqueue.h>
-#include <linux/bsg.h>
 
 struct partition {
 	unsigned char boot_ind;		/* 0x80 - active */
@@ -92,7 +91,6 @@ struct hd_struct {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
 #endif
-	struct bsg_class_device bsg_dev;
 };
 
 #define GENHD_FL_REMOVABLE			1
-- 
cgit v1.2.3


From 4cf0723ac89b5f2189da2ad07ef875de26b83c77 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 30 Mar 2007 11:19:39 +0200
Subject: bsg: minor bug fixes

This fixes the following minor issues:

- add EXPORT_SYMBOL_GPL for bsg_register_queue and
bsg_unregister_queue.

- shut up gcc warnings

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <axboe@nelson.home.kernel.dk>
---
 include/linux/bsg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 0475a6d3ff6a..0dd01f90ba5e 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -51,7 +51,7 @@ struct bsg_class_device {
 	struct request_queue *queue;
 };
 
-extern int bsg_register_queue(struct request_queue *, char *);
+extern int bsg_register_queue(struct request_queue *, const char *);
 extern void bsg_unregister_queue(struct request_queue *);
 #else
 struct bsg_class_device { };
-- 
cgit v1.2.3


From abae1fde63fcdd2a3abaa0d7930938d8326f83d2 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 16 Jul 2007 08:52:14 +0200
Subject: add a struct request pointer to the request structure

This adds a struct request pointer to the request structure for the
second data phase (bidi for now). A request queue supporting bidi
requests sets QUEUE_FLAG_BIDI. This prevents sending bidi requests to
a non-bidi queue.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24b474e05a44..b32564a1e105 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -317,6 +317,9 @@ struct request {
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
+
+	/* for bidi */
+	struct request *next_rq;
 };
 
 /*
@@ -486,6 +489,7 @@ struct request_queue
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 
 enum {
 	/*
@@ -550,6 +554,7 @@ enum {
 #define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
+#define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-- 
cgit v1.2.3


From 15d10b611fa94b52f004a08a1d4cf7b39de3cba3 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 16 Jul 2007 08:52:16 +0200
Subject: bsg: add SCSI transport-level request support

This enables bsg to handle SCSI transport-level request like SAS
management protocol (SMP).

- add BSG_SUB_PROTOCOL_{SCSI_CMD, SCSI_TMF, SCSI_TRANSPORT} definitions.
- SCSI transport-level requests skip blk_verify_command().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bsg.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 0dd01f90ba5e..bd998ca6cb2e 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -1,6 +1,12 @@
 #ifndef BSG_H
 #define BSG_H
 
+#define BSG_PROTOCOL_SCSI		0
+
+#define BSG_SUB_PROTOCOL_SCSI_CMD	0
+#define BSG_SUB_PROTOCOL_SCSI_TMF	1
+#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT	2
+
 struct sg_io_v4 {
 	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
 	__u32 protocol;		/* [i] 0 -> SCSI , .... */
-- 
cgit v1.2.3


From d6d281684913dabb878e2f53219eed5df2cd867b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 28 Jun 2007 08:38:16 -0400
Subject: KVM: Remove kvmfs in favor of the anonymous inodes source

kvm uses a pseudo filesystem, kvmfs, to generate inodes, a job that the
new anonymous inodes source does much better.

Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/magic.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/magic.h b/include/linux/magic.h
index 9d713c03e3da..36cc20dfd142 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,7 +13,6 @@
 #define HPFS_SUPER_MAGIC	0xf995e849
 #define ISOFS_SUPER_MAGIC	0x9660
 #define JFFS2_SUPER_MAGIC	0x72b6
-#define KVMFS_SUPER_MAGIC	0x19700426
 #define ANON_INODE_FS_MAGIC	0x09041934
 
 #define MINIX_SUPER_MAGIC	0x137F		/* original minix fs */
-- 
cgit v1.2.3


From db912f963909b3cbc3a059b7528f6a1a1eb6ffae Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 24 May 2007 12:23:10 +0300
Subject: HOTPLUG: Add CPU_DYING notifier

KVM wants a notification when a cpu is about to die, so it can disable
hardware extensions, but at a time when user processes cannot be scheduled
on the cpu, so it doesn't try to use virtualization extensions after they
have been disabled.

This adds a CPU_DYING notification.  The notification is called in atomic
context on the doomed cpu.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/notifier.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9431101bf876..576f2bb34cc8 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_LOCK_ACQUIRE	0x0008 /* Acquire all hotcpu locks */
 #define CPU_LOCK_RELEASE	0x0009 /* Release all hotcpu locks */
+#define CPU_DYING		0x000A /* CPU (unsigned)v not running any task,
+				        * not handling interrupts, soon dead */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 #define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 #define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
-- 
cgit v1.2.3


From a52b1752c077cb919b71167c54968a0b91673281 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 9 Jul 2007 17:11:49 +0300
Subject: SMP: Allow smp_call_function_single() to current cpu

This removes the requirement for callers to get_cpu() to check in simple
cases.  This patch is for !CONFIG_SMP.

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/smp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 96ac21f8dd73..8039daced688 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
  */
 
 #include <linux/errno.h>
+#include <asm/system.h>
 
 extern void cpu_idle(void);
 
@@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { }
 static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
 					   void *info, int retry, int wait)
 {
-	return -EBUSY;
+	WARN_ON(cpuid != 0);
+	local_irq_disable();
+	func(info);
+	local_irq_enable();
+	return 0;
 }
 
 #endif /* !SMP */
-- 
cgit v1.2.3


From f2a11b158a24301e9158e9c873fa88e5eb775486 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitingupta910@gmail.com>
Date: Sun, 15 Jul 2007 23:37:21 -0700
Subject: LZO1X: fix lzo1x_worst_compress

This is a correction for a macro which gives worst case compressed data
size by LZO1X.

This patch was provided by the LZO author (Markus Oberhumer).

Signed-off-by: Nitin Gupta <nitingupta910@gmail.com>
Cc: "Markus F.X.J. Oberhumer" <markus@oberhumer.com>
Cc: "Richard Purdie" <rpurdie@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lzo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lzo.h b/include/linux/lzo.h
index 582d8b711a13..d793497ec1ca 100644
--- a/include/linux/lzo.h
+++ b/include/linux/lzo.h
@@ -17,7 +17,7 @@
 #define LZO1X_MEM_COMPRESS	(16384 * sizeof(unsigned char *))
 #define LZO1X_1_MEM_COMPRESS	LZO1X_MEM_COMPRESS
 
-#define lzo1x_worst_compress(x) (x + (x / 64) + 16 + 3)
+#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
 
 /* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */
 int lzo1x_1_compress(const unsigned char *src, size_t src_len,
-- 
cgit v1.2.3


From 96d7fa421e6424ad9ef6d1d039375dc2edb63fe8 Mon Sep 17 00:00:00 2001
From: Kristian Hoegsberg <krh@redhat.com>
Date: Sun, 15 Jul 2007 23:37:24 -0700
Subject: lib: add idr_for_each()

This patch adds an iterator function for the idr data structure.  Compared
to just iterating through the idr with an integer and idr_find, this
iterator is (almost, but not quite) linear in the number of elements, as
opposed to the number of integers in the range covered by the idr.  This
makes a difference for sparse idrs, but more importantly, it's a nicer way
to iterate through the elements.

The drm subsystem is moving to idr for tracking contexts and drawables, and
with this change, we can use the idr exclusively for tracking these
resources.

[akpm@linux-foundation.org: fix comment]
Signed-off-by: Kristian Hoegsberg <krh@redhat.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 915572fa030b..8442c0bffc06 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -78,6 +78,8 @@ void *idr_find(struct idr *idp, int id);
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
 int idr_get_new(struct idr *idp, void *ptr, int *id);
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
+int idr_for_each(struct idr *idp,
+		 int (*fn)(int id, void *p, void *data), void *data);
 void *idr_replace(struct idr *idp, void *ptr, int id);
 void idr_remove(struct idr *idp, int id);
 void idr_destroy(struct idr *idp);
-- 
cgit v1.2.3


From 23936cc0b5d89619c34c2dab11d8cf3d6f7ca028 Mon Sep 17 00:00:00 2001
From: Kristian Hoegsberg <krh@redhat.com>
Date: Sun, 15 Jul 2007 23:37:24 -0700
Subject: lib: add idr_remove_all

Remove all ids from the given idr tree.  idr_destroy() only frees up
unused, cached idp_layers, but this function will remove all id mappings
and leave all idp_layers unused.

A typical clean-up sequence for objects stored in an idr tree, will use
idr_for_each() to free all objects, if necessay, then idr_remove_all() to
remove all ids, and idr_destroy() to free up the cached idr_layers.

Signed-off-by: Kristian Hoegsberg <krh@redhat.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 8442c0bffc06..0edda411959c 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -82,6 +82,7 @@ int idr_for_each(struct idr *idp,
 		 int (*fn)(int id, void *p, void *data), void *data);
 void *idr_replace(struct idr *idp, void *ptr, int id);
 void idr_remove(struct idr *idp, int id);
+void idr_remove_all(struct idr *idp);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
 
-- 
cgit v1.2.3


From 18a8bd949d6adb311ea816125ff65050df1f3f6e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Sun, 15 Jul 2007 23:37:59 -0700
Subject: serial: convert early_uart to earlycon for 8250

Beacuse SERIAL_PORT_DFNS is removed from include/asm-i386/serial.h and
include/asm-x86_64/serial.h.  the serial8250_ports need to be probed late in
serial initializing stage.  the console_init=>serial8250_console_init=>
register_console=>serial8250_console_setup will return -ENDEV, and console
ttyS0 can not be enabled at that time.  need to wait till uart_add_one_port in
drivers/serial/serial_core.c to call register_console to get console ttyS0.
that is too late.

Make early_uart to use early_param, so uart console can be used earlier.  Make
it to be bootconsole with CON_BOOT flag, so can use console handover feature.
and it will switch to corresponding normal serial console automatically.

new command line will be:
	console=uart8250,io,0x3f8,9600n8
	console=uart8250,mmio,0xff5e0000,115200n8
or
	earlycon=uart8250,io,0x3f8,9600n8
	earlycon=uart8250,mmio,0xff5e0000,115200n8

it will print in very early stage:
	Early serial console at I/O port 0x3f8 (options '9600n8')
	console [uart0] enabled
later for console it will print:
	console handover: boot [uart0] -> real [ttyS0]

Signed-off-by: <yinghai.lu@sun.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Gerd Hoffmann <kraxel@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/console.h     | 2 ++
 include/linux/serial.h      | 6 ------
 include/linux/serial_8250.h | 4 ++++
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 62ef6e11d0d2..c44d3dfde7a5 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -99,6 +99,7 @@ struct console {
 	struct tty_driver *(*device)(struct console *, int *);
 	void	(*unblank)(void);
 	int	(*setup)(struct console *, char *);
+	int	(*early_setup)(void);
 	short	flags;
 	short	index;
 	int	cflag;
@@ -107,6 +108,7 @@ struct console {
 };
 
 extern int add_preferred_console(char *name, int idx, char *options);
+extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
 extern void register_console(struct console *);
 extern int unregister_console(struct console *);
 extern struct console *console_drivers;
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 33fc8cb8ddfb..deb714314fb1 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -177,11 +177,5 @@ struct serial_icounter_struct {
 #ifdef __KERNEL__
 #include <linux/compiler.h>
 
-/* Allow architectures to override entries in serial8250_ports[] at run time: */
-struct uart_port;	/* forward declaration */
-extern int early_serial_setup(struct uart_port *port);
-extern int early_serial_console_init(char *options);
-extern int serial8250_start_console(struct uart_port *port, char *options);
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SERIAL_H */
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 71310d80c09a..706ee9a4c80c 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -60,4 +60,8 @@ void serial8250_unregister_port(int line);
 void serial8250_suspend_port(int line);
 void serial8250_resume_port(int line);
 
+extern int serial8250_find_port(struct uart_port *p);
+extern int serial8250_find_port_for_earlycon(void);
+extern int setup_early_serial8250_console(char *cmdline);
+
 #endif
-- 
cgit v1.2.3


From f0c0b2b808f232741eadac272bd4bc51f18df0f4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sun, 15 Jul 2007 23:38:01 -0700
Subject: change zonelist order: zonelist order selection logic

Make zonelist creation policy selectable from sysctl/boot option v6.

This patch makes NUMA's zonelist (of pgdat) order selectable.
Available order are Default(automatic)/ Node-based / Zone-based.

[Default Order]
The kernel selects Node-based or Zone-based order automatically.

[Node-based Order]
This policy treats the locality of memory as the most important parameter.
Zonelist order is created by each zone's locality. This means lower zones
(ex. ZONE_DMA) can be used before higher zone (ex. ZONE_NORMAL) exhausion.
IOW. ZONE_DMA will be in the middle of zonelist.
current 2.6.21 kernel uses this.

Pros.
 * A user can expect local memory as much as possible.
Cons.
 * lower zone will be exhansted before higher zone. This may cause OOM_KILL.

Maybe suitable if ZONE_DMA is relatively big and you never see OOM_KILL
because of ZONE_DMA exhaution and you need the best locality.

(example)
assume 2 node NUMA. node(0) has ZONE_DMA/ZONE_NORMAL, node(1) has ZONE_NORMAL.

*node(0)'s memory allocation order:

 node(0)'s NORMAL -> node(0)'s DMA -> node(1)'s NORMAL.

*node(1)'s memory allocation order:

 node(1)'s NORMAL -> node(0)'s NORMAL -> node(0)'s DMA.

[Zone-based order]
This policy treats the zone type as the most important parameter.
Zonelist order is created by zone-type order. This means lower zone
never be used bofere higher zone exhaustion.
IOW. ZONE_DMA will be always at the tail of zonelist.

Pros.
 * OOM_KILL(bacause of lower zone) occurs only if the whole zones are exhausted.
Cons.
 * memory locality may not be best.

(example)
assume 2 node NUMA. node(0) has ZONE_DMA/ZONE_NORMAL, node(1) has ZONE_NORMAL.

*node(0)'s memory allocation order:

 node(0)'s NORMAL -> node(1)'s NORMAL -> node(0)'s DMA.

*node(1)'s memory allocation order:

 node(1)'s NORMAL -> node(0)'s NORMAL -> node(0)'s DMA.

bootoption "numa_zonelist_order=" and proc/sysctl is supporetd.

command:
%echo N > /proc/sys/vm/numa_zonelist_order

Will rebuild zonelist in Node-based order.

command:
%echo Z > /proc/sys/vm/numa_zonelist_order

Will rebuild zonelist in Zone-based order.

Thanks to Lee Schermerhorn, he gives me much help and codes.

[Lee.Schermerhorn@hp.com: add check_highest_zone to build_zonelists_in_zone_order]
[akpm@linux-foundation.org: build fix]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: "jesse.barnes@intel.com" <jesse.barnes@intel.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d09b1345a3a1..04b1636a970b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -566,6 +566,11 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
 
+extern int numa_zonelist_order_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
+extern char numa_zonelist_order[];
+#define NUMA_ZONELIST_ORDER_LEN 16	/* string buffer size */
+
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
 #ifndef numa_node_id
-- 
cgit v1.2.3


From 698827fa9f45019df1609bb686bc51c94e127fbc Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:38:06 -0700
Subject: Remove the deprecated "kmem_cache_t" typedef from slab.h.

Given that there is no remaining usage of the deprecated kmem_cache_t
typedef anywhere in the tree, remove that typedef.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index cebcd3833c76..cd6ab658553f 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -14,8 +14,6 @@
 #include <linux/gfp.h>
 #include <linux/types.h>
 
-typedef struct kmem_cache kmem_cache_t __deprecated;
-
 /*
  * Flags to pass to kmem_cache_create().
  * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set.
-- 
cgit v1.2.3


From fc9a07e7bf1a76e710f5df017abb07628db1781d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 15 Jul 2007 23:38:14 -0700
Subject: invalidate_mapping_pages(): add cond_resched

invalidate_mapping_pages() can sometimes take a long time (millions of pages
to free).  Long enough for the softlockup detector to trigger.

We used to have a cond_resched() in there but I took it out because the
drop_caches code calls invalidate_mapping_pages() under inode_lock.

The patch adds a nasty flag and puts the cond_resched() back.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f0b3bf5983c..51c938a71dec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1610,6 +1610,9 @@ extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 #endif
 extern int invalidate_inodes(struct super_block *);
+unsigned long __invalidate_mapping_pages(struct address_space *mapping,
+					pgoff_t start, pgoff_t end,
+					bool be_atomic);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-- 
cgit v1.2.3


From 8f0accc8627043702e6ea2bb8b9aa3a171ef8393 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Sun, 15 Jul 2007 23:38:19 -0700
Subject: kill vmalloc_earlyreserve

This symbol got orphaned quite a while ago.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1c1207472bb4..bbd427e8741a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -27,7 +27,6 @@ extern unsigned long max_mapnr;
 
 extern unsigned long num_physpages;
 extern void * high_memory;
-extern unsigned long vmalloc_earlyreserve;
 extern int page_cluster;
 
 #ifdef CONFIG_SYSCTL
-- 
cgit v1.2.3


From 6193a2ff180920f84ee06977165ebf32431fc2d2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 15 Jul 2007 23:38:22 -0700
Subject: slob: initial NUMA support

This adds preliminary NUMA support to SLOB, primarily aimed at systems with
small nodes (tested all the way down to a 128kB SRAM block), whether
asymmetric or otherwise.

We follow the same conventions as SLAB/SLUB, preferring current node
placement for new pages, or with explicit placement, if a node has been
specified.  Presently on UP NUMA this has the side-effect of preferring
node#0 allocations (since numa_node_id() == 0, though this could be
reworked if we could hand off a pfn to determine node placement), so
single-CPU NUMA systems will want to place smaller nodes further out in
terms of node id.  Once a page has been bound to a node (via explicit node
id typing), we only do block allocations from partial free pages that have
a matching node id in the page flags.

The current implementation does have some scalability problems, in that all
partial free pages are tracked in the global freelist (with contention due
to the single spinlock).  However, these are things that are being reworked
for SMP scalability first, while things like per-node freelists can easily
be built on top of this sort of functionality once it's been added.

More background can be found in:

	http://marc.info/?l=linux-mm&m=118117916022379&w=2
	http://marc.info/?l=linux-mm&m=118170446306199&w=2
	http://marc.info/?l=linux-mm&m=118187859420048&w=2

and subsequent threads.

Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 126 +++++++++++++++++++++++------------------------
 include/linux/slab_def.h |   4 ++
 include/linux/slob_def.h |  46 +++++++++++++++++
 include/linux/slub_def.h |   6 ++-
 4 files changed, 117 insertions(+), 65 deletions(-)
 create mode 100644 include/linux/slob_def.h

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index cd6ab658553f..27402fea9b79 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -42,7 +42,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *, struct kmem_cache *, unsigned long));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 void kmem_cache_free(struct kmem_cache *, void *);
 unsigned int kmem_cache_size(struct kmem_cache *);
@@ -61,16 +60,6 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 		sizeof(struct __struct), __alignof__(struct __struct),\
 		(__flags), NULL, NULL)
 
-#ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-#else
-static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
-					gfp_t flags, int node)
-{
-	return kmem_cache_alloc(cachep, flags);
-}
-#endif
-
 /*
  * The largest kmalloc size supported by the slab allocators is
  * 32 megabyte (2^25) or the maximum allocatable page order if that is
@@ -89,7 +78,6 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
 /*
  * Common kmalloc functions provided by all allocators
  */
-void *__kmalloc(size_t, gfp_t);
 void *__kzalloc(size_t, gfp_t);
 void * __must_check krealloc(const void *, size_t, gfp_t);
 void kfree(const void *);
@@ -100,40 +88,6 @@ size_t ksize(const void *);
  * @n: number of elements.
  * @size: element size.
  * @flags: the type of memory to allocate.
- */
-static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
-{
-	if (n != 0 && size > ULONG_MAX / n)
-		return NULL;
-	return __kzalloc(n * size, flags);
-}
-
-/*
- * Allocator specific definitions. These are mainly used to establish optimized
- * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by selecting
- * the appropriate general cache at compile time.
- */
-
-#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB)
-#ifdef CONFIG_SLUB
-#include <linux/slub_def.h>
-#else
-#include <linux/slab_def.h>
-#endif /* !CONFIG_SLUB */
-#else
-
-/*
- * Fallback definitions for an allocator not wanting to provide
- * its own optimized kmalloc definitions (like SLOB).
- */
-
-/**
- * kmalloc - allocate memory
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * kmalloc is the normal method of allocating memory
- * in the kernel.
  *
  * The @flags argument may be one of:
  *
@@ -141,7 +95,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  *
  * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
  *
- * %GFP_ATOMIC - Allocation will not sleep.
+ * %GFP_ATOMIC - Allocation will not sleep.  May use emergency pools.
  *   For example, use this inside interrupt handlers.
  *
  * %GFP_HIGHUSER - Allocate pages from high memory.
@@ -150,18 +104,22 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  *
  * %GFP_NOFS - Do not make any fs calls while trying to get memory.
  *
+ * %GFP_NOWAIT - Allocation will not sleep.
+ *
+ * %GFP_THISNODE - Allocate node-local memory only.
+ *
+ * %GFP_DMA - Allocation suitable for DMA.
+ *   Should only be used for kmalloc() caches. Otherwise, use a
+ *   slab created with SLAB_DMA.
+ *
  * Also it is possible to set different flags by OR'ing
  * in one or more of the following additional @flags:
  *
  * %__GFP_COLD - Request cache-cold pages instead of
  *   trying to return cache-warm pages.
  *
- * %__GFP_DMA - Request memory from the DMA-capable zone.
- *
  * %__GFP_HIGH - This allocation has high priority and may use emergency pools.
  *
- * %__GFP_HIGHMEM - Allocated memory may be from highmem.
- *
  * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
  *   (think twice before using).
  *
@@ -171,24 +129,57 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
  *
  * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ *
+ * There are other flags available as well, but these are not intended
+ * for general use, and so are not documented here. For a full list of
+ * potential flags, always refer to linux/gfp.h.
  */
-static inline void *kmalloc(size_t size, gfp_t flags)
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
-	return __kmalloc(size, flags);
+	if (n != 0 && size > ULONG_MAX / n)
+		return NULL;
+	return __kzalloc(n * size, flags);
 }
 
-/**
- * kzalloc - allocate memory. The memory is set to zero.
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kmalloc).
+/*
+ * Allocator specific definitions. These are mainly used to establish optimized
+ * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
+ * selecting the appropriate general cache at compile time.
+ *
+ * Allocators must define at least:
+ *
+ *	kmem_cache_alloc()
+ *	__kmalloc()
+ *	kmalloc()
+ *	kzalloc()
+ *
+ * Those wishing to support NUMA must also define:
+ *
+ *	kmem_cache_alloc_node()
+ *	kmalloc_node()
+ *
+ * See each allocator definition file for additional comments and
+ * implementation notes.
  */
-static inline void *kzalloc(size_t size, gfp_t flags)
-{
-	return __kzalloc(size, flags);
-}
+#ifdef CONFIG_SLUB
+#include <linux/slub_def.h>
+#elif defined(CONFIG_SLOB)
+#include <linux/slob_def.h>
+#else
+#include <linux/slab_def.h>
 #endif
 
-#ifndef CONFIG_NUMA
+#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB)
+/**
+ * kmalloc_node - allocate memory from a specific node
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kcalloc).
+ * @node: node to allocate from.
+ *
+ * kmalloc() for non-local nodes, used to allocate from a specific node
+ * if available. Equivalent to kmalloc() in the non-NUMA single-node
+ * case.
+ */
 static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return kmalloc(size, flags);
@@ -198,7 +189,15 @@ static inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __kmalloc(size, flags);
 }
-#endif /* !CONFIG_NUMA */
+
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
+
+static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
+					gfp_t flags, int node)
+{
+	return kmem_cache_alloc(cachep, flags);
+}
+#endif /* !CONFIG_NUMA && !CONFIG_SLOB */
 
 /*
  * kmalloc_track_caller is a special version of kmalloc that records the
@@ -245,4 +244,3 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
 
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SLAB_H */
-
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8d81a60518e4..365d036c454a 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -25,6 +25,9 @@ struct cache_sizes {
 };
 extern struct cache_sizes malloc_sizes[];
 
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
+void *__kmalloc(size_t size, gfp_t flags);
+
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
@@ -79,6 +82,7 @@ found:
 
 #ifdef CONFIG_NUMA
 extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
new file mode 100644
index 000000000000..a2daf2d418a9
--- /dev/null
+++ b/include/linux/slob_def.h
@@ -0,0 +1,46 @@
+#ifndef __LINUX_SLOB_DEF_H
+#define __LINUX_SLOB_DEF_H
+
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+
+static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	return kmem_cache_alloc_node(cachep, flags, -1);
+}
+
+void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+	return __kmalloc_node(size, flags, node);
+}
+
+/**
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kcalloc).
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ */
+static inline void *kmalloc(size_t size, gfp_t flags)
+{
+	return __kmalloc_node(size, flags, -1);
+}
+
+static inline void *__kmalloc(size_t size, gfp_t flags)
+{
+	return kmalloc(size, flags);
+}
+
+/**
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kcalloc).
+ */
+static inline void *kzalloc(size_t size, gfp_t flags)
+{
+	return __kzalloc(size, flags);
+}
+
+#endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 6207a3d8da71..a582f6771525 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -171,6 +171,9 @@ static inline struct kmem_cache *kmalloc_slab(size_t size)
 #define ZERO_SIZE_PTR ((void *)16)
 
 
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
+void *__kmalloc(size_t size, gfp_t flags);
+
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) {
@@ -198,7 +201,8 @@ static inline void *kzalloc(size_t size, gfp_t flags)
 }
 
 #ifdef CONFIG_NUMA
-extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+void *__kmalloc_node(size_t size, gfp_t flags, int node);
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
-- 
cgit v1.2.3


From 0165ab443556bdfad388da6c33d74a71b77d72b2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Sun, 15 Jul 2007 23:38:26 -0700
Subject: split mmap

This is a straightforward split of do_mmap_pgoff() into two functions:

 - do_mmap_pgoff() checks the parameters, and calculates the vma
   flags.  Then it calls

 - mmap_region(), which does the actual mapping

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bbd427e8741a..d4ab4e590e23 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1071,6 +1071,10 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long pgoff);
+extern unsigned long mmap_region(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long flags,
+	unsigned int vm_flags, unsigned long pgoff,
+	int accountable);
 
 static inline unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
-- 
cgit v1.2.3


From 786d7e1612f0b0adb6046f19b906609e4fe8b1ba Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@sw.ru>
Date: Sun, 15 Jul 2007 23:39:00 -0700
Subject: Fix rmmod/read/write races in /proc entries

Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
   meanwhile. Or, more generically, system call done on /proc file, method
   supplied by module is called, module dissapeares meanwhile.

   pde = create_proc_entry()
   if (!pde)
	return -ENOMEM;
   pde->write_proc = ...
				open
				write
				copy_from_user
   pde = create_proc_entry();
   if (!pde) {
	remove_proc_entry();
	return -ENOMEM;
	/* module unloaded */
   }
				*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()

  remove_proc_entry		vfs_read
  proc_kill_inodes		[check ->f_op validness]
				[check ->f_op->read validness]
				[verify_area, security permissions checks]
	->f_op = NULL;
				if (file->f_op->read)
					/* ->f_op dereference, boom */

NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.

NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3469f96bc8b2..28e3664fdf1b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -7,6 +7,8 @@
 #include <linux/magic.h>
 #include <asm/atomic.h>
 
+struct completion;
+
 /*
  * The proc filesystem constants/structures
  */
@@ -56,6 +58,14 @@ struct proc_dir_entry {
 	gid_t gid;
 	loff_t size;
 	const struct inode_operations *proc_iops;
+	/*
+	 * NULL ->proc_fops means "PDE is going away RSN" or
+	 * "PDE is just created". In either case, e.g. ->read_proc won't be
+	 * called because it's too late or too early, respectively.
+	 *
+	 * If you're allocating ->proc_fops dynamically, save a pointer
+	 * somewhere.
+	 */
 	const struct file_operations *proc_fops;
 	get_info_t *get_info;
 	struct module *owner;
@@ -66,6 +76,9 @@ struct proc_dir_entry {
 	atomic_t count;		/* use count */
 	int deleted;		/* delete flag */
 	void *set;
+	int pde_users;	/* number of callers into module in progress */
+	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	struct completion *pde_unload_completion;
 };
 
 struct kcore_list {
-- 
cgit v1.2.3


From e1f4a88c5a15a86124a95ea712213bb7dab2ad99 Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Sun, 15 Jul 2007 23:39:24 -0700
Subject: introduce write_trylock_irqsave()

Introduce a write_trylock_irqsave() implementation.  Similar in style to
the implementation of spin_trylock_irqsave() in mainline.

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: Sripathi Kodi <sripathik@in.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spinlock.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index a946176db638..c376f3b36c89 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -282,6 +282,13 @@ do {						\
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
 
+#define write_trylock_irqsave(lock, flags) \
+({ \
+	local_irq_save(flags); \
+	write_trylock(lock) ? \
+	1 : ({ local_irq_restore(flags); 0; }); \
+})
+
 /*
  * Locks two spinlocks l1 and l2.
  * l1_first indicates if spinlock l1 should be taken first.
-- 
cgit v1.2.3


From ed4aaadb1a7913f509f05d3e67840541a180713f Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Sun, 15 Jul 2007 23:39:39 -0700
Subject: fix jvc cdrom drive lockup

Before calling init_hwif_default, ide_unregister gets lock ide_lock and
disables irq.  init_hwif_default calls ide_default_io_base which calls
pci_get_device and later pci_get_subsys tries to apply for semaphore
pci_bus_sem and goes to sleep.

Mostly, pci_get_device should be called when irq is turned on.

ide_default_io_base just needs find if list pci_devices is empty.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Cc: Greg KH <greg@kroah.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 37a71580ad8a..5e84f2e8d54c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -432,6 +432,8 @@ extern struct bus_type pci_bus_type;
  * code, or pci core code. */
 extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 extern struct list_head pci_devices;	/* list of all devices */
+/* Some device drivers need know if pci is initiated */
+extern int no_pci_devices(void);
 
 void pcibios_fixup_bus(struct pci_bus *);
 int __must_check pcibios_enable_device(struct pci_dev *, int mask);
@@ -724,6 +726,7 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *
 { return NULL; }
 
 #define pci_dev_present(ids)	(0)
+#define no_pci_devices()	(1)
 #define pci_find_present(ids)	(NULL)
 #define pci_dev_put(dev)	do { } while (0)
 
-- 
cgit v1.2.3


From 7c3f1a573237b90ef331267260358a0ec4ac9079 Mon Sep 17 00:00:00 2001
From: Tomas Janousek <tjanouse@redhat.com>
Date: Sun, 15 Jul 2007 23:39:41 -0700
Subject: Introduce boot based time

The commits

  411187fb05cd11676b0979d9fbf3291db69dbce2 (GTOD: persistent clock support)
  c1d370e167d66b10bca3b602d3740405469383de (i386: use GTOD persistent clock
    support)

changed the monotonic time so that it no longer jumps after resume, but it's
not possible to use it for boot time and process start time calculations then.
 Also, the uptime no longer increases during suspend.

I add a variable to track the wall_to_monotonic changes, a function to get the
real boot time and a function to get the boot based time from the monotonic
one.

[akpm@linux-foundation.org: remove exports, add comment]
Signed-off-by: Tomas Janousek <tjanouse@redhat.com>
Cc: Tomas Smetana <tsmetana@redhat.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index dda9be685ab6..4bb05a829be9 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -116,6 +116,8 @@ extern int do_setitimer(int which, struct itimerval *value,
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday(struct timespec *tv);
+extern void getboottime(struct timespec *ts);
+extern void monotonic_to_bootbased(struct timespec *ts);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_is_continuous(void);
-- 
cgit v1.2.3


From 924b42d5a2dbe508407a0a6290d3751f826bccdd Mon Sep 17 00:00:00 2001
From: Tomas Janousek <tjanouse@redhat.com>
Date: Sun, 15 Jul 2007 23:39:42 -0700
Subject: Use boot based time for process start time and boot time in /proc

Commit 411187fb05cd11676b0979d9fbf3291db69dbce2 caused boot time to move and
process start times to become invalid after suspend.  Using boot based time
for those restores the old behaviour and fixes the issue.

[akpm@linux-foundation.org: little cleanup]
Signed-off-by: Tomas Janousek <tjanouse@redhat.com>
Cc: Tomas Smetana <tsmetana@redhat.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cfb680585ab8..3cffc1204663 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -972,7 +972,8 @@ struct task_struct {
 	unsigned int rt_priority;
 	cputime_t utime, stime;
 	unsigned long nvcsw, nivcsw; /* context switch counts */
-	struct timespec start_time;
+	struct timespec start_time; 		/* monotonic time */
+	struct timespec real_start_time;	/* boot based time */
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-- 
cgit v1.2.3


From 9c1729db3e6d738f872bcb090212af00473bf666 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Sun, 15 Jul 2007 23:39:43 -0700
Subject: Prevent an O_NDELAY writer from blocking when a tty write is blocked
 by the tty atomic writer mutex

Without this a tty write could block if a previous blocking tty write was
in progress on the same tty and blocked by a line discipline or hardware
event.  Originally found and reported by Dave Johnson.

Signed-off-by: Alan Cox <alan@redhat.com>
Acked-by: Dave Johnson <djohnson+linux-kernel@sw.starentnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index bb4576085203..deaba9ec5004 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -338,6 +338,12 @@ extern struct tty_struct *get_current_tty(void);
 
 extern struct mutex tty_mutex;
 
+extern void tty_write_unlock(struct tty_struct *tty);
+extern int tty_write_lock(struct tty_struct *tty, int ndelay);
+#define tty_is_writelocked(tty)  (mutex_is_locked(&tty->atomic_write_lock))
+
+
+
 /* n_tty.c */
 extern struct tty_ldisc tty_ldisc_N_TTY;
 
-- 
cgit v1.2.3


From 9ac162521cd9796f44d263a61090634844c719a6 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Sun, 15 Jul 2007 23:39:49 -0700
Subject: Use mutexes instead of semaphores in I2O driver

The I2O driver uses two semaphores as mutexes.  Use the mutex API instead of
the (binary) semaphores.

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2o.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 52f53e2e70c3..333a370a3bdc 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>	/* work_struct */
 #include <linux/mempool.h>
+#include <linux/mutex.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>	/* Needed for MUTEX init macros */
@@ -425,7 +426,7 @@ struct i2o_device {
 
 	struct device device;
 
-	struct semaphore lock;	/* device lock */
+	struct mutex lock;	/* device lock */
 };
 
 /*
@@ -544,7 +545,7 @@ struct i2o_controller {
 	struct i2o_dma hrt;	/* HW Resource Table */
 	i2o_lct *lct;		/* Logical Config Table */
 	struct i2o_dma dlct;	/* Temp LCT */
-	struct semaphore lct_lock;	/* Lock for LCT updates */
+	struct mutex lct_lock;	/* Lock for LCT updates */
 	struct i2o_dma status_block;	/* IOP status block */
 
 	struct i2o_io base;	/* controller messaging unit */
-- 
cgit v1.2.3


From 21f3da95daed2d0f0c28cc4ef8b1103fbfb7bded Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 15 Jul 2007 23:39:50 -0700
Subject: fuse warning fix

gcc-4.3:

fs/fuse/dir.c: In function 'parse_dirfile':
fs/fuse/dir.c:833: warning: cast from pointer to integer of different size
fs/fuse/dir.c:835: warning: cast from pointer to integer of different size

[miklos@szeredi.hu: use offsetof]
Acked-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fuse.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 534744efe30d..9fbe9d258e22 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -339,7 +339,7 @@ struct fuse_dirent {
 	char name[0];
 };
 
-#define FUSE_NAME_OFFSET ((unsigned) ((struct fuse_dirent *) 0)->name)
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
 #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
-- 
cgit v1.2.3


From c67ad917cbf21b2862e2cf8e8b28339872ef7927 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 15 Jul 2007 23:39:51 -0700
Subject: percpu_counters(): use cpu notifiers

per-cpu counters presently must iterate over all possible CPUs in the
exhaustive percpu_counter_sum().

But it can be much better to only iterate over the presently-online CPUs.  To
do this, we must arrange for an offlined CPU's count to be spilled into the
counter's central count.

We can do this for all percpu_counters in the machine by linking them into a
single global list and walking that list at CPU_DEAD time.

(I hope.  Might have race windows in which the percpu_counter_sum() count is
inaccurate?)

Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu_counter.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index f5aa593ccf32..3d9f70972cdf 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -8,6 +8,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/list.h>
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
@@ -17,6 +18,9 @@
 struct percpu_counter {
 	spinlock_t lock;
 	s64 count;
+#ifdef CONFIG_HOTPLUG_CPU
+	struct list_head list;	/* All percpu_counters are on a list */
+#endif
 	s32 *counters;
 };
 
@@ -26,18 +30,8 @@ struct percpu_counter {
 #define FBC_BATCH	(NR_CPUS*4)
 #endif
 
-static inline void percpu_counter_init(struct percpu_counter *fbc, s64 amount)
-{
-	spin_lock_init(&fbc->lock);
-	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
-}
-
-static inline void percpu_counter_destroy(struct percpu_counter *fbc)
-{
-	free_percpu(fbc->counters);
-}
-
+void percpu_counter_init(struct percpu_counter *fbc, s64 amount);
+void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_mod(struct percpu_counter *fbc, s32 amount);
 s64 percpu_counter_sum(struct percpu_counter *fbc);
 
-- 
cgit v1.2.3


From 9aacd599342fdfc1fb9422f37e900609b7a46249 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sun, 15 Jul 2007 23:39:56 -0700
Subject: fat: gcc 4.3 warning fix

This patch fixes the following warnings.

fs/fat/dir.c: In function 'fat_parse_long':
include/linux/msdos_fs.h:294: warning: array subscript is above array bounds
include/linux/msdos_fs.h:295: warning: array subscript is above array bounds
include/linux/msdos_fs.h:295: warning: array subscript is above array bounds

The ->name is defined as "name[8], ext[3]", but fat_checksum() uses
those as name[11]. There is no actual problem, but it's not a good manner.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 0e09c005dda8..f950921523f5 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -146,7 +146,7 @@ struct fat_boot_fsinfo {
 };
 
 struct msdos_dir_entry {
-	__u8	name[8],ext[3];	/* name and extension */
+	__u8	name[MSDOS_NAME];/* name and extension */
 	__u8	attr;		/* attribute bits */
 	__u8    lcase;		/* Case for base and extension */
 	__u8	ctime_cs;	/* Creation time, centiseconds (0-199) */
-- 
cgit v1.2.3


From 0a3021f4e249fbdb5f30d614707b5e02022e4c9b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:39:57 -0700
Subject: Remove unnecessary includes of spinlock.h under include/linux

Remove the obviously unnecessary includes of <linux/spinlock.h> under the
include/linux/ directory, and fix the couple errors that are introduced as
a result of that.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/attribute_container.h | 1 -
 include/linux/capability.h          | 1 -
 include/linux/console.h             | 1 -
 include/linux/ds17287rtc.h          | 1 -
 include/linux/ipc.h                 | 1 +
 include/linux/leds.h                | 1 -
 include/linux/module.h              | 1 -
 include/linux/percpu.h              | 2 +-
 include/linux/scx200_gpio.h         | 2 --
 include/linux/signal.h              | 1 -
 include/linux/smp_lock.h            | 1 -
 include/linux/timer.h               | 1 -
 12 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index 93bfb0beb62a..8ff274933948 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -12,7 +12,6 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/klist.h>
-#include <linux/spinlock.h>
 
 struct attribute_container {
 	struct list_head	node;
diff --git a/include/linux/capability.h b/include/linux/capability.h
index bbf8df7de28f..2dfa58555934 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -44,7 +44,6 @@ typedef struct __user_cap_data_struct {
   
 #ifdef __KERNEL__
 
-#include <linux/spinlock.h>
 #include <asm/current.h>
 
 /* #define STRICT_CAP_T_TYPECHECKS */
diff --git a/include/linux/console.h b/include/linux/console.h
index c44d3dfde7a5..56a7bcda49cb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -15,7 +15,6 @@
 #define _LINUX_CONSOLE_H_ 1
 
 #include <linux/types.h>
-#include <linux/spinlock.h>
 
 struct vc_data;
 struct console_font_op;
diff --git a/include/linux/ds17287rtc.h b/include/linux/ds17287rtc.h
index c281ba42e28f..d85d3f497b96 100644
--- a/include/linux/ds17287rtc.h
+++ b/include/linux/ds17287rtc.h
@@ -11,7 +11,6 @@
 #define __LINUX_DS17287RTC_H
 
 #include <linux/rtc.h>			/* get the user-level API */
-#include <linux/spinlock.h>		/* spinlock_t */
 #include <linux/mc146818rtc.h>
 
 /* Register A */
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 1980867a64a4..7c8c6d8d090c 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -52,6 +52,7 @@ struct ipc_perm
 #ifdef __KERNEL__
 
 #include <linux/kref.h>
+#include <linux/spinlock.h>
 
 #define IPCMNI 32768  /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
 
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 88afceffb7cb..494bed7c2fc1 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,7 +13,6 @@
 #define __LINUX_LEDS_H_INCLUDED
 
 #include <linux/list.h>
-#include <linux/spinlock.h>
 
 struct device;
 struct class_device;
diff --git a/include/linux/module.h b/include/linux/module.h
index e6e0f86ef5fc..b6a646cea1cb 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -6,7 +6,6 @@
  * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
  * Rewritten again by Rusty Russell, 2002
  */
-#include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/stat.h>
 #include <linux/compiler.h>
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b72be2f79e6a..926adaae0f96 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
 
-#include <linux/spinlock.h> /* For preempt_disable() */
+#include <linux/preempt.h>
 #include <linux/slab.h> /* For kmalloc() */
 #include <linux/smp.h>
 #include <linux/string.h> /* For memset() */
diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h
index 1a82d30c4b17..d2b058130eb1 100644
--- a/include/linux/scx200_gpio.h
+++ b/include/linux/scx200_gpio.h
@@ -1,5 +1,3 @@
-#include <linux/spinlock.h>
-
 u32 scx200_gpio_configure(unsigned index, u32 set, u32 clear);
 
 extern unsigned scx200_gpio_base;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 9a5eac508e5e..ea91abe740da 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -6,7 +6,6 @@
 
 #ifdef __KERNEL__
 #include <linux/list.h>
-#include <linux/spinlock.h>
 
 /*
  * Real Time signals may be queued.
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index cf715a40d833..58962c51dee1 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,7 +3,6 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
-#include <linux/spinlock.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
diff --git a/include/linux/timer.h b/include/linux/timer.h
index c661710d3627..2b59e6d4219c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -3,7 +3,6 @@
 
 #include <linux/list.h>
 #include <linux/ktime.h>
-#include <linux/spinlock.h>
 #include <linux/stddef.h>
 
 struct tvec_t_base_s;
-- 
cgit v1.2.3


From e8d6c554126b830217c5e9f549e0e21f865a0a8a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 15 Jul 2007 23:40:12 -0700
Subject: AFS: implement file locking

Implement file locking for AFS.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51c938a71dec..aa4530c1ff7a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -820,6 +820,10 @@ struct file_lock {
 	union {
 		struct nfs_lock_info	nfs_fl;
 		struct nfs4_lock_info	nfs4_fl;
+		struct {
+			struct list_head link;	/* link in AFS vnode's pending_locks list */
+			int state;		/* state of grant or error if -ve */
+		} afs;
 	} fl_u;
 };
 
-- 
cgit v1.2.3


From 9e7bf24b1b979db256ddc84d0d4ac6040d706da6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 15 Jul 2007 23:40:25 -0700
Subject: fs: clarify "dummy" member in struct inodes_stat_t

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa4530c1ff7a..e68780810279 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -44,7 +44,7 @@ extern int get_max_files(void);
 struct inodes_stat_t {
 	int nr_inodes;
 	int nr_unused;
-	int dummy[5];
+	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
 extern struct inodes_stat_t inodes_stat;
 
-- 
cgit v1.2.3


From 1b0fac45878bb88759eec347c273285195649ff7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 15 Jul 2007 23:40:26 -0700
Subject: dma-mapping: prevent dma dependent code from linking on !HAS_DMA
 archs

Continuing the work started in 411f0f3edc141a582190d3605cadd1d993abb6df ...

This enables code with a dma path, that compiles away, to build without
requiring additional code factoring.  It also prevents code that calls
dma_alloc_coherent and dma_free_coherent from linking whereas previously
the code would hit a BUG() at run time.  Finally, it allows archs that set
!HAS_DMA to delete their asm/dma-mapping.h file.

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: James Bottomley <James.Bottomley@SteelEye.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: <geert@linux-m68k.org>
Cc: <zippel@linux-m68k.org>
Cc: <spyro@f2s.com>
Cc: <ysato@users.sourceforge.jp>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 9a663c6db16a..2dc21cbeb304 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -31,7 +31,11 @@ static inline int valid_dma_direction(int dma_direction)
 		(dma_direction == DMA_FROM_DEVICE));
 }
 
+#ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
+#else
+#include <asm-generic/dma-mapping-broken.h>
+#endif
 
 /* Backwards compat, remove in 2.7.x */
 #define dma_sync_single		dma_sync_single_for_cpu
-- 
cgit v1.2.3


From e0807061908a7a9441d0f745deb444f7216904cb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 15 Jul 2007 23:40:30 -0700
Subject: remove odd and misleading comments from uio.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uio.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 9af8bbcd8963..b7fe13883bdb 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -13,10 +13,6 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-
-/* A word of warning: Our uio structure will clash with the C library one (which is now obsolete). Remove the C
-   library one from sys/uio.h if you have a very old library set */
-
 struct iovec
 {
 	void __user *iov_base;	/* BSD uses caddr_t (1003.1g requires void *) */
@@ -38,11 +34,6 @@ struct kvec {
  
 #define UIO_FASTIOV	8
 #define UIO_MAXIOV	1024
-#if 0
-#define UIO_MAXIOV	16	/* Maximum iovec's in one operation 
-				   16 matches BSD */
-                                /* Beg pardon: BSD has 1024 --ANK */
-#endif
 
 /*
  * Total number of bytes covered by an iovec.
-- 
cgit v1.2.3


From c5c061b8f9726bc2c25e19dec227933a13d1e6b7 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun, 15 Jul 2007 23:40:30 -0700
Subject: Add a flag to indicate deferrable timers in /proc/timer_stats

Add a flag in /proc/timer_stats to indicate deferrable timers.  This will
let developers/users to differentiate between types of tiemrs in
/proc/timer_stats.

Deferrable timer and normal timer will appear in /proc/timer_stats as below.
  10D,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)
   10,     1 swapper          queue_delayed_work_on (delayed_work_timer_fn)

Also version of timer_stats changes from v0.1 to v0.2

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hrtimer.h |  5 +++--
 include/linux/timer.h   | 15 ++++-----------
 2 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 17c29dca8354..540799bc85f8 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -329,12 +329,13 @@ extern void sysrq_timer_list_show(void);
 #ifdef CONFIG_TIMER_STATS
 
 extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-				     void *timerf, char * comm);
+				     void *timerf, char *comm,
+				     unsigned int timer_flag);
 
 static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
 {
 	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-				 timer->function, timer->start_comm);
+				 timer->function, timer->start_comm, 0);
 }
 
 extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 2b59e6d4219c..78cf899b4409 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -90,16 +90,13 @@ extern unsigned long get_next_timer_interrupt(unsigned long now);
  */
 #ifdef CONFIG_TIMER_STATS
 
+#define TIMER_STATS_FLAG_DEFERRABLE	0x1
+
 extern void init_timer_stats(void);
 
 extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-				     void *timerf, char * comm);
-
-static inline void timer_stats_account_timer(struct timer_list *timer)
-{
-	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-				 timer->function, timer->start_comm);
-}
+				     void *timerf, char *comm,
+				     unsigned int timer_flag);
 
 extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
 					       void *addr);
@@ -118,10 +115,6 @@ static inline void init_timer_stats(void)
 {
 }
 
-static inline void timer_stats_account_timer(struct timer_list *timer)
-{
-}
-
 static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
 {
 }
-- 
cgit v1.2.3


From 4a19542e5f694cd408a32c3d9dc593ba9366e2d7 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Sun, 15 Jul 2007 23:40:34 -0700
Subject: O_CLOEXEC for SCM_RIGHTS

Part two in the O_CLOEXEC saga: adding support for file descriptors received
through Unix domain sockets.

The patch is once again pretty minimal, it introduces a new flag for recvmsg
and passes it just like the existing MSG_CMSG_COMPAT flag.  I think this bit
is not used otherwise but the networking people will know better.

This new flag is not recognized by recvfrom and recv.  These functions cannot
be used for that purpose and the asymmetry this introduces is not worse than
the already existing MSG_CMSG_COMPAT situations.

The patch must be applied on the patch which introduced O_CLOEXEC.  It has to
remove static from the new get_unused_fd_flags function but since scm.c cannot
live in a module the function still hasn't to be exported.

Here's a test program to make sure the code works.  It's so much longer than
the actual patch...

#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/un.h>

#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
#endif
#ifndef MSG_CMSG_CLOEXEC
# define MSG_CMSG_CLOEXEC 0x40000000
#endif

int
main (int argc, char *argv[])
{
  if (argc > 1)
    {
      int fd = atol (argv[1]);
      printf ("child: fd = %d\n", fd);
      if (fcntl (fd, F_GETFD) == 0 || errno != EBADF)
        {
          puts ("file descriptor valid in child");
          return 1;
        }
      return 0;

    }

  struct sockaddr_un sun;
  strcpy (sun.sun_path, "./testsocket");
  sun.sun_family = AF_UNIX;

  char databuf[] = "hello";
  struct iovec iov[1];
  iov[0].iov_base = databuf;
  iov[0].iov_len = sizeof (databuf);

  union
  {
    struct cmsghdr hdr;
    char bytes[CMSG_SPACE (sizeof (int))];
  } buf;
  struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
                        .msg_control = buf.bytes,
                        .msg_controllen = sizeof (buf) };
  struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);

  cmsg->cmsg_level = SOL_SOCKET;
  cmsg->cmsg_type = SCM_RIGHTS;
  cmsg->cmsg_len = CMSG_LEN (sizeof (int));

  msg.msg_controllen = cmsg->cmsg_len;

  pid_t child = fork ();
  if (child == -1)
    error (1, errno, "fork");
  if (child == 0)
    {
      int sock = socket (PF_UNIX, SOCK_STREAM, 0);
      if (sock < 0)
        error (1, errno, "socket");

      if (bind (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
        error (1, errno, "bind");
      if (listen (sock, SOMAXCONN) < 0)
        error (1, errno, "listen");

      int conn = accept (sock, NULL, NULL);
      if (conn == -1)
        error (1, errno, "accept");

      *(int *) CMSG_DATA (cmsg) = sock;
      if (sendmsg (conn, &msg, MSG_NOSIGNAL) < 0)
        error (1, errno, "sendmsg");

      return 0;
    }

  /* For a test suite this should be more robust like a
     barrier in shared memory.  */
  sleep (1);

  int sock = socket (PF_UNIX, SOCK_STREAM, 0);
  if (sock < 0)
    error (1, errno, "socket");

  if (connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
    error (1, errno, "connect");
  unlink (sun.sun_path);

  *(int *) CMSG_DATA (cmsg) = -1;

  if (recvmsg (sock, &msg, MSG_CMSG_CLOEXEC) < 0)
    error (1, errno, "recvmsg");

  int fd = *(int *) CMSG_DATA (cmsg);
  if (fd == -1)
    error (1, 0, "no descriptor received");

  char fdname[20];
  snprintf (fdname, sizeof (fdname), "%d", fd);
  execl ("/proc/self/exe", argv[0], fdname, NULL);
  puts ("execl failed");
  return 1;
}

[akpm@linux-foundation.org: Fix fastcall inconsistency noted by Michael Buesch]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/file.h   | 1 +
 include/linux/socket.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/file.h b/include/linux/file.h
index a59001e9ea58..0114fbc78061 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -73,6 +73,7 @@ extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed));
 extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
+extern int get_unused_fd_flags(int flags);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
 struct kmem_cache;
 
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fe195c97a89d..f852e1afd65a 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -253,6 +253,9 @@ struct ucred {
 
 #define MSG_EOF         MSG_FIN
 
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exit for file
+					   descriptor received through
+					   SCM_RIGHTS */
 #if defined(CONFIG_COMPAT)
 #define MSG_CMSG_COMPAT	0x80000000	/* This message needs 32 bit fixups */
 #else
-- 
cgit v1.2.3


From aa0ac36518be648dda3a32f0b37a8b2b546e1b24 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 15 Jul 2007 23:40:39 -0700
Subject: Remove capability.h from mm.h

I forgot to remove capability.h from mm.h while removing sched.h!  This
patch remedies that, because the only inline function which was using
CAP_something was made out of line.

Cross-compile tested without regressions on:

	all powerpc defconfigs
	all mips defconfigs
	all m68k defconfigs
	all arm defconfigs
	all ia64 defconfigs

	alpha alpha-allnoconfig alpha-defconfig alpha-up
	arm
	i386 i386-allnoconfig i386-defconfig i386-up
	ia64 ia64-allnoconfig ia64-defconfig ia64-up
	m68k
	mips
	parisc parisc-allnoconfig parisc-defconfig parisc-up
	powerpc powerpc-up
	s390 s390-allnoconfig s390-defconfig s390-up
	sparc sparc-allnoconfig sparc-defconfig sparc-up
	sparc64 sparc64-allnoconfig sparc64-defconfig sparc64-up
	um-x86_64
	x86_64 x86_64-allnoconfig x86_64-defconfig x86_64-up

as well as my two usual configs.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d4ab4e590e23..97d0cddfd223 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2,7 +2,6 @@
 #define _LINUX_MM_H
 
 #include <linux/errno.h>
-#include <linux/capability.h>
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 759448f459234bfcf34b82471f0dba77a9aca498 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@linux01.gwdg.de>
Date: Sun, 15 Jul 2007 23:40:40 -0700
Subject: Kernel utf-8 handling

This patch fixes dead keys and copy/paste of non-ASCII characters in UTF-8
mode on Linux console.  See more details about the original patch at:
http://chris.heathens.co.nz/linux/utf8.html

Already posted on
	(Oldest) http://lkml.org/lkml/2003/5/31/148
	         http://lkml.org/lkml/2005/12/24/69
	(Recent) http://lkml.org/lkml/2006/8/7/75

[bunk@stusta.de: make drivers/char/selection.c:store_utf8() static]
Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Cc: Alexander E. Patrakov <patrakov@ums.usu.ru>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/consolemap.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 82c9a1f11020..06b2768c603f 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -8,9 +8,12 @@
 #define IBMPC_MAP 2
 #define USER_MAP 3
 
+#include <linux/types.h>
+
 struct vc_data;
 
-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
 extern unsigned short *set_translate(int m, struct vc_data *vc);
 extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
+extern u32 conv_8bit_to_uni(unsigned char c);
 void console_map_init(void);
-- 
cgit v1.2.3


From c289dca37917338fc8ab2e0d7e202a1c927e229e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 15 Jul 2007 23:40:42 -0700
Subject: remove sonypi_camera_command()

Remove the no longer used sonypi_camera_command().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sonypi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 34d4b075f7b8..40c7b5d993b9 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -153,8 +153,6 @@
 #define SONYPI_COMMAND_GETCAMERAROMVERSION	18	/* obsolete */
 #define SONYPI_COMMAND_GETCAMERAREVISION	19	/* obsolete */
 
-int sonypi_camera_command(int command, u8 value);
-
 #endif				/* __KERNEL__ */
 
 #endif				/* _SONYPI_H_ */
-- 
cgit v1.2.3


From dcae56ea661e13d8f904b584bbe4c1e50c7ee548 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:40:45 -0700
Subject: Drop an empty isicom.h from being exported to user space.

Drop <linux/isicom.h> from being exported to user space since it would
be only an empty file.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Acked-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 127d2d192b5a..bcf875e844fe 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -247,7 +247,6 @@ unifdef-y += isdn.h
 unifdef-y += isdnif.h
 unifdef-y += isdn_divertif.h
 unifdef-y += isdn_ppp.h
-unifdef-y += isicom.h
 unifdef-y += jbd.h
 unifdef-y += joystick.h
 unifdef-y += kdev_t.h
-- 
cgit v1.2.3


From b663a79c191508f27cd885224b592a878c0ba0f6 Mon Sep 17 00:00:00 2001
From: Maxim Uvarov <muvarov@ru.mvista.com>
Date: Sun, 15 Jul 2007 23:40:48 -0700
Subject: taskstats: add context-switch counters

Make available to the user the following task and process performance
statistics:

	* Involuntary Context Switches (task_struct->nivcsw)
	* Voluntary Context Switches (task_struct->nvcsw)

Statistics information is available from:
	1. taskstats interface (Documentation/accounting/)
	2. /proc/PID/status (task only).

This data is useful for detecting hyperactivity patterns between processes.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Cc: Jonathan Lim <jlim@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/taskstats.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index a46104a28f66..dce1ed204972 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	4
+#define TASKSTATS_VERSION	5
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -149,6 +149,9 @@ struct taskstats {
 	__u64	read_bytes;		/* bytes of read I/O */
 	__u64	write_bytes;		/* bytes of write I/O */
 	__u64	cancelled_write_bytes;	/* bytes of cancelled write I/O */
+
+	__u64  nvcsw;			/* voluntary_ctxt_switches */
+	__u64  nivcsw;			/* nonvoluntary_ctxt_switches */
 };
 
 
-- 
cgit v1.2.3


From 5216184571946b8bbf06f0cd630c7754190fdd1a Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sun, 15 Jul 2007 23:40:51 -0700
Subject: fix typo in prefetch.h

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prefetch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index fc86f274147f..1adfe668d031 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -27,7 +27,7 @@
 	
 	prefetch(x)  	- prefetches the cacheline at "x" for read
 	prefetchw(x)	- prefetches the cacheline at "x" for write
-	spin_lock_prefetch(x) - prefectches the spinlock *x for taking
+	spin_lock_prefetch(x) - prefetches the spinlock *x for taking
 	
 	there is also PREFETCH_STRIDE which is the architecure-prefered 
 	"lookahead" size for prefetching streamed operations.
-- 
cgit v1.2.3


From 4f27c00bf80f122513d3a5be16ed851573164534 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sun, 15 Jul 2007 23:40:55 -0700
Subject: Improve behaviour of spurious IRQ detect

Currently we handle spurious IRQ activity based upon seeing a lot of
invalid interrupts, and we clear things back on the base of lots of valid
interrupts.

Unfortunately in some cases you get legitimate invalid interrupts caused by
timing asynchronicity between the PCI bus and the APIC bus when disabling
interrupts and pulling other tricks.  In this case although the spurious
IRQs are not a problem our unhandled counters didn't clear and they act as
a slow running timebomb.  (This is effectively what the serial port/tty
problem that was fixed by clearing counters when registering a handler
showed up)

It's easy enough to add a second parameter - time.  This means that if we
see a regular stream of harmless spurious interrupts which are not harming
processing we don't go off and do something stupid like disable the IRQ
after a month of running.  OTOH lockups and performance killers show up a
lot more than 10/second

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1695054e8c63..44657197fcb0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -161,6 +161,7 @@ struct irq_desc {
 	unsigned int		wake_depth;	/* nested wake enables */
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned int		irqs_unhandled;
+	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_t		affinity;
-- 
cgit v1.2.3


From 522ed7767e800cff6c650ec64b0ee0677303119c Mon Sep 17 00:00:00 2001
From: Miloslav Trmac <mitr@redhat.com>
Date: Sun, 15 Jul 2007 23:40:56 -0700
Subject: Audit: add TTY input auditing

Add TTY input auditing, used to audit system administrator's actions.  This is
required by various security standards such as DCID 6/3 and PCI to provide
non-repudiation of administrator's actions and to allow a review of past
actions if the administrator seems to overstep their duties or if the system
becomes misconfigured for unknown reasons.  These requirements do not make it
necessary to audit TTY output as well.

Compared to an user-space keylogger, this approach records TTY input using the
audit subsystem, correlated with other audit events, and it is completely
transparent to the user-space application (e.g.  the console ioctls still
work).

TTY input auditing works on a higher level than auditing all system calls
within the session, which would produce an overwhelming amount of mostly
useless audit events.

Add an "audit_tty" attribute, inherited across fork ().  Data read from TTYs
by process with the attribute is sent to the audit subsystem by the kernel.
The audit netlink interface is extended to allow modifying the audit_tty
attribute, and to allow sending explanatory audit events from user-space (for
example, a shell might send an event containing the final command, after the
interactive command-line editing and history expansion is performed, which
might be difficult to decipher from the TTY input alone).

Because the "audit_tty" attribute is inherited across fork (), it would be set
e.g.  for sshd restarted within an audited session.  To prevent this, the
audit_tty attribute is cleared when a process with no open TTY file
descriptors (e.g.  after daemon startup) opens a TTY.

See https://www.redhat.com/archives/linux-audit/2007-June/msg00000.html for a
more detailed rationale document for an older version of this patch.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Miloslav Trmac <mitr@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Paul Fulghum <paulkf@microgate.com>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h | 11 +++++++++++
 include/linux/sched.h |  4 ++++
 include/linux/tty.h   | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index fccc6e50298a..8ca7ca0b47f0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -63,9 +63,12 @@
 #define AUDIT_ADD_RULE		1011	/* Add syscall filtering rule */
 #define AUDIT_DEL_RULE		1012	/* Delete syscall filtering rule */
 #define AUDIT_LIST_RULES	1013	/* List syscall filtering rules */
+#define AUDIT_TTY_GET		1014	/* Get TTY auditing status */
+#define AUDIT_TTY_SET		1015	/* Set TTY auditing status */
 
 #define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC		1107	/* We filter this differently */
+#define AUDIT_USER_TTY		1124	/* Non-ICANON TTY input meaning */
 #define AUDIT_LAST_USER_MSG	1199
 #define AUDIT_FIRST_USER_MSG2	2100	/* More user space messages */
 #define AUDIT_LAST_USER_MSG2	2999
@@ -92,6 +95,7 @@
 #define AUDIT_KERNEL_OTHER	1316	/* For use by 3rd party modules */
 #define AUDIT_FD_PAIR		1317    /* audit record for pipe/socketpair */
 #define AUDIT_OBJ_PID		1318	/* ptrace target */
+#define AUDIT_TTY		1319	/* Input on an administrative TTY */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -289,6 +293,10 @@ struct audit_status {
 	__u32		backlog;	/* messages waiting in queue */
 };
 
+struct audit_tty_status {
+	__u32		enabled; /* 1 = enabled, 0 = disabled */
+};
+
 /* audit_rule_data supports filter rules with both integer and string
  * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
  * AUDIT_LIST_RULES requests.
@@ -515,11 +523,13 @@ extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const char *prefix,
 					     struct dentry *dentry,
 					     struct vfsmount *vfsmnt);
+extern void		    audit_log_lost(const char *message);
 				/* Private API (for audit.c only) */
 extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
 extern int audit_filter_type(int type);
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 			 void *data, size_t datasz, uid_t loginuid, u32 sid);
+extern int audit_enabled;
 #else
 #define audit_log(c,g,t,f,...) do { ; } while (0)
 #define audit_log_start(c,g,t) ({ NULL; })
@@ -530,6 +540,7 @@ extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
 #define audit_log_d_path(b,p,d,v) do { ; } while (0)
+#define audit_enabled 0
 #endif
 #endif
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3cffc1204663..b579624477f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -529,6 +529,10 @@ struct signal_struct {
 #ifdef CONFIG_TASKSTATS
 	struct taskstats *stats;
 #endif
+#ifdef CONFIG_AUDIT
+	unsigned audit_tty;
+	struct tty_audit_buf *tty_audit_buf;
+#endif
 };
 
 /* Context switch must be unlocked if interrupts are to be enabled */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index deaba9ec5004..691a1748d9d2 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -178,6 +178,7 @@ struct tty_bufhead {
 #define L_IEXTEN(tty)	_L_FLAG((tty),IEXTEN)
 
 struct device;
+struct signal_struct;
 /*
  * Where all of the state associated with a tty is kept while the tty
  * is open.  Since the termios state should be kept even if the tty
@@ -310,6 +311,7 @@ extern void tty_hangup(struct tty_struct * tty);
 extern void tty_vhangup(struct tty_struct * tty);
 extern void tty_unhangup(struct file *filp);
 extern int tty_hung_up_p(struct file * filp);
+extern int is_tty(struct file *filp);
 extern void do_SAK(struct tty_struct *tty);
 extern void __do_SAK(struct tty_struct *tty);
 extern void disassociate_ctty(int priv);
@@ -347,6 +349,37 @@ extern int tty_write_lock(struct tty_struct *tty, int ndelay);
 /* n_tty.c */
 extern struct tty_ldisc tty_ldisc_N_TTY;
 
+/* tty_audit.c */
+#ifdef CONFIG_AUDIT
+extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+			       size_t size);
+extern void tty_audit_exit(void);
+extern void tty_audit_fork(struct signal_struct *sig);
+extern void tty_audit_push(struct tty_struct *tty);
+extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid);
+extern void tty_audit_opening(void);
+#else
+static inline void tty_audit_add_data(struct tty_struct *tty,
+				      unsigned char *data, size_t size)
+{
+}
+static inline void tty_audit_exit(void)
+{
+}
+static inline void tty_audit_fork(struct signal_struct *sig)
+{
+}
+static inline void tty_audit_push(struct tty_struct *tty)
+{
+}
+static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid)
+{
+}
+static inline void tty_audit_opening(void)
+{
+}
+#endif
+
 /* tty_ioctl.c */
 extern int n_tty_ioctl(struct tty_struct * tty, struct file * file,
 		       unsigned int cmd, unsigned long arg);
-- 
cgit v1.2.3


From 7d69a1f4a72b18876c99c697692b78339d491568 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Sun, 15 Jul 2007 23:40:58 -0700
Subject: remove CONFIG_UTS_NS and CONFIG_IPC_NS

CONFIG_UTS_NS and CONFIG_IPC_NS have very little value as they only
deactivate the unshare of the uts and ipc namespaces and do not improve
performance.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Acked-by: "Serge E. Hallyn" <serue@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc.h     | 11 +++--------
 include/linux/utsname.h | 13 -------------
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 7c8c6d8d090c..3fd3ddd5f0d9 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -93,6 +93,7 @@ extern struct ipc_namespace init_ipc_ns;
 
 #ifdef CONFIG_SYSVIPC
 #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
+extern void free_ipc_ns(struct kref *kref);
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 						struct ipc_namespace *ns);
 #else
@@ -104,13 +105,9 @@ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
 }
 #endif
 
-#ifdef CONFIG_IPC_NS
-extern void free_ipc_ns(struct kref *kref);
-#endif
-
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
-#ifdef CONFIG_IPC_NS
+#ifdef CONFIG_SYSVIPC
 	if (ns)
 		kref_get(&ns->kref);
 #endif
@@ -119,7 +116,7 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 
 static inline void put_ipc_ns(struct ipc_namespace *ns)
 {
-#ifdef CONFIG_IPC_NS
+#ifdef CONFIG_SYSVIPC
 	kref_put(&ns->kref, free_ipc_ns);
 #endif
 }
@@ -127,5 +124,3 @@ static inline void put_ipc_ns(struct ipc_namespace *ns)
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
-
-
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index f8d3b326e93a..51ad167611e4 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -48,7 +48,6 @@ static inline void get_uts_ns(struct uts_namespace *ns)
 	kref_get(&ns->kref);
 }
 
-#ifdef CONFIG_UTS_NS
 extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns);
 extern void free_uts_ns(struct kref *kref);
 
@@ -56,18 +55,6 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
 	kref_put(&ns->kref, free_uts_ns);
 }
-#else
-static inline struct uts_namespace *copy_utsname(int flags,
-						struct uts_namespace *ns)
-{
-	return ns;
-}
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
-}
-#endif
-
 static inline struct new_utsname *utsname(void)
 {
 	return &current->nsproxy->uts_ns->name;
-- 
cgit v1.2.3


From acce292c82d4d82d35553b928df2b0597c3a9c78 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Sun, 15 Jul 2007 23:40:59 -0700
Subject: user namespace: add the framework

Basically, it will allow a process to unshare its user_struct table,
resetting at the same time its own user_struct and all the associated
accounting.

A new root user (uid == 0) is added to the user namespace upon creation.
Such root users have full privileges and it seems that theses privileges
should be controlled through some means (process capabilities ?)

The unshare is not included in this patch.

Changes since [try #4]:
	- Updated get_user_ns and put_user_ns to accept NULL, and
	  get_user_ns to return the namespace.

Changes since [try #3]:
	- moved struct user_namespace to files user_namespace.{c,h}

Changes since [try #2]:
	- removed struct user_namespace* argument from find_user()

Changes since [try #1]:
	- removed struct user_namespace* argument from find_user()
	- added a root_user per user namespace

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Pavel Emelianov <xemul@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: Andrew Morgan <agm@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init_task.h      |  2 ++
 include/linux/nsproxy.h        |  1 +
 include/linux/sched.h          |  3 ++-
 include/linux/user_namespace.h | 57 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/user_namespace.h

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 276ccaa2670c..cab741c2d603 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -8,6 +8,7 @@
 #include <linux/lockdep.h>
 #include <linux/ipc.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
 	.uts_ns		= &init_uts_ns,					\
 	.mnt_ns		= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
+	.user_ns	= &init_user_ns,				\
 }
 
 #define INIT_SIGHAND(sighand) {						\
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 189e0dc993ab..6d179a397bfb 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -28,6 +28,7 @@ struct nsproxy {
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
 	struct pid_namespace *pid_ns;
+	struct user_namespace *user_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b579624477f4..c667255d70db 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -287,6 +287,7 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 
 struct nsproxy;
+struct user_namespace;
 
 /* Maximum number of active map areas.. This is a random (large) number */
 #define DEFAULT_MAX_MAP_COUNT	65536
@@ -1408,7 +1409,7 @@ extern struct task_struct *find_task_by_pid_type(int type, int pid);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(struct user_namespace *, uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
 	atomic_inc(&u->__count);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
new file mode 100644
index 000000000000..92a45867ecfb
--- /dev/null
+++ b/include/linux/user_namespace.h
@@ -0,0 +1,57 @@
+#ifndef _LINUX_USER_NAMESPACE_H
+#define _LINUX_USER_NAMESPACE_H
+
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+
+#define UIDHASH_BITS	(CONFIG_BASE_SMALL ? 3 : 8)
+#define UIDHASH_SZ	(1 << UIDHASH_BITS)
+
+struct user_namespace {
+	struct kref		kref;
+	struct list_head	uidhash_table[UIDHASH_SZ];
+	struct user_struct	*root_user;
+};
+
+extern struct user_namespace init_user_ns;
+
+#ifdef CONFIG_USER_NS
+
+static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+
+extern struct user_namespace *copy_user_ns(int flags,
+					   struct user_namespace *old_ns);
+extern void free_user_ns(struct kref *kref);
+
+static inline void put_user_ns(struct user_namespace *ns)
+{
+	if (ns)
+		kref_put(&ns->kref, free_user_ns);
+}
+
+#else
+
+static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+{
+	return &init_user_ns;
+}
+
+static inline struct user_namespace *copy_user_ns(int flags,
+						  struct user_namespace *old_ns)
+{
+	return NULL;
+}
+
+static inline void put_user_ns(struct user_namespace *ns)
+{
+}
+
+#endif
+
+#endif /* _LINUX_USER_H */
-- 
cgit v1.2.3


From 77ec739d8d0979477fc91f530403805afa2581a4 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Sun, 15 Jul 2007 23:41:01 -0700
Subject: user namespace: add unshare

This patch enables the unshare of user namespaces.

It adds a new clone flag CLONE_NEWUSER and implements copy_user_ns() which
resets the current user_struct and adds a new root user (uid == 0)

For now, unsharing the user namespace allows a process to reset its
user_struct accounting and uid 0 in the new user namespace should be contained
using appropriate means, for instance selinux

The plan, when the full support is complete (all uid checks covered), is to
keep the original user's rights in the original namespace, and let a process
become uid 0 in the new namespace, with full capabilities to the new
namespace.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Acked-by: Pavel Emelianov <xemul@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: Andrew Morgan <agm@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h          | 1 +
 include/linux/user_namespace.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c667255d70db..731edaca8ffd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -26,6 +26,7 @@
 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
+#define CLONE_NEWUSER		0x10000000	/* New user namespace */
 
 /*
  * Scheduling policies
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 92a45867ecfb..bb320573bb9e 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -4,6 +4,7 @@
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
 #include <linux/sched.h>
+#include <linux/err.h>
 
 #define UIDHASH_BITS	(CONFIG_BASE_SMALL ? 3 : 8)
 #define UIDHASH_SZ	(1 << UIDHASH_BITS)
@@ -45,6 +46,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 static inline struct user_namespace *copy_user_ns(int flags,
 						  struct user_namespace *old_ns)
 {
+	if (flags & CLONE_NEWUSER)
+		return ERR_PTR(-EINVAL);
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From ea5a3dcfda1c9140228f2842ea9b01e1713c559a Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:41:04 -0700
Subject: COBALT: remove all references to Cobalt NVRAM

Remove not only the references to Cobalt NVRAM, but the header file as
well.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Acked-by: Tim Hockin <thockin@hockin.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cobalt-nvram.h | 109 -------------------------------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 include/linux/cobalt-nvram.h

(limited to 'include/linux')

diff --git a/include/linux/cobalt-nvram.h b/include/linux/cobalt-nvram.h
deleted file mode 100644
index ea429562ff36..000000000000
--- a/include/linux/cobalt-nvram.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * $Id: cobalt-nvram.h,v 1.20 2001/10/17 23:16:55 thockin Exp $
- * cobalt-nvram.h : defines for the various fields in the cobalt NVRAM
- *
- * Copyright 2001,2002 Sun Microsystems, Inc.
- */
-
-#ifndef COBALT_NVRAM_H
-#define COBALT_NVRAM_H
-
-#include <linux/nvram.h>
-
-#define COBT_CMOS_INFO_MAX		0x7f	/* top address allowed */
-#define COBT_CMOS_BIOS_DRIVE_INFO	0x12	/* drive info would go here */
-
-#define COBT_CMOS_CKS_START		NVRAM_OFFSET(0x0e)
-#define COBT_CMOS_CKS_END		NVRAM_OFFSET(0x7f)
-
-/* flag bytes - 16 flags for now, leave room for more */
-#define COBT_CMOS_FLAG_BYTE_0		NVRAM_OFFSET(0x10)
-#define COBT_CMOS_FLAG_BYTE_1		NVRAM_OFFSET(0x11)
-
-/* flags in flag bytes - up to 16 */
-#define COBT_CMOS_FLAG_MIN		0x0001
-#define COBT_CMOS_CONSOLE_FLAG		0x0001 /* console on/off */
-#define COBT_CMOS_DEBUG_FLAG		0x0002 /* ROM debug messages */
-#define COBT_CMOS_AUTO_PROMPT_FLAG	0x0004 /* boot to ROM prompt? */
-#define COBT_CMOS_CLEAN_BOOT_FLAG	0x0008 /* set by a clean shutdown */
-#define COBT_CMOS_HW_NOPROBE_FLAG	0x0010 /* go easy on the probing */
-#define COBT_CMOS_SYSFAULT_FLAG		0x0020 /* system fault detected */
-#define COBT_CMOS_OOPSPANIC_FLAG	0x0040 /* panic on oops */
-#define COBT_CMOS_DELAY_CACHE_FLAG	0x0080 /* delay cache initialization */
-#define COBT_CMOS_NOLOGO_FLAG		0x0100 /* hide "C" logo @ boot */
-#define COBT_CMOS_VERSION_FLAG		0x0200 /* the version field is valid */
-#define COBT_CMOS_FLAG_MAX		0x0200
-
-/* leave byte 0x12 blank - Linux looks for drive info here */
-
-/* CMOS structure version, valid if COBT_CMOS_VERSION_FLAG is true */
-#define COBT_CMOS_VERSION		NVRAM_OFFSET(0x13)
-#define COBT_CMOS_VER_BTOCODE		1 /* min. version needed for btocode */
-
-/* index of default boot method */
-#define COBT_CMOS_BOOT_METHOD		NVRAM_OFFSET(0x20)
-#define COBT_CMOS_BOOT_METHOD_DISK	0
-#define COBT_CMOS_BOOT_METHOD_ROM	1
-#define COBT_CMOS_BOOT_METHOD_NET	2
-
-#define COBT_CMOS_BOOT_DEV_MIN		NVRAM_OFFSET(0x21)
-/* major #, minor # of first through fourth boot device */
-#define COBT_CMOS_BOOT_DEV0_MAJ		NVRAM_OFFSET(0x21)
-#define COBT_CMOS_BOOT_DEV0_MIN		NVRAM_OFFSET(0x22)
-#define COBT_CMOS_BOOT_DEV1_MAJ		NVRAM_OFFSET(0x23)
-#define COBT_CMOS_BOOT_DEV1_MIN		NVRAM_OFFSET(0x24)
-#define COBT_CMOS_BOOT_DEV2_MAJ		NVRAM_OFFSET(0x25)
-#define COBT_CMOS_BOOT_DEV2_MIN		NVRAM_OFFSET(0x26)
-#define COBT_CMOS_BOOT_DEV3_MAJ		NVRAM_OFFSET(0x27)
-#define COBT_CMOS_BOOT_DEV3_MIN		NVRAM_OFFSET(0x28)
-#define COBT_CMOS_BOOT_DEV_MAX		NVRAM_OFFSET(0x28)
-
-/* checksum of bytes 0xe-0x7f */
-#define COBT_CMOS_CHECKSUM		NVRAM_OFFSET(0x2e)
-
-/* running uptime counter, units of 5 minutes (32 bits =~ 41000 years) */
-#define COBT_CMOS_UPTIME_0		NVRAM_OFFSET(0x30)
-#define COBT_CMOS_UPTIME_1		NVRAM_OFFSET(0x31)
-#define COBT_CMOS_UPTIME_2		NVRAM_OFFSET(0x32)
-#define COBT_CMOS_UPTIME_3		NVRAM_OFFSET(0x33)
-
-/* count of successful boots (32 bits) */
-#define COBT_CMOS_BOOTCOUNT_0		NVRAM_OFFSET(0x38)
-#define COBT_CMOS_BOOTCOUNT_1		NVRAM_OFFSET(0x39)
-#define COBT_CMOS_BOOTCOUNT_2		NVRAM_OFFSET(0x3a)
-#define COBT_CMOS_BOOTCOUNT_3		NVRAM_OFFSET(0x3b)
-
-/* 13 bytes: system serial number, same as on the back of the system */
-#define COBT_CMOS_SYS_SERNUM_LEN	13
-#define COBT_CMOS_SYS_SERNUM_0		NVRAM_OFFSET(0x40)
-#define COBT_CMOS_SYS_SERNUM_1		NVRAM_OFFSET(0x41)
-#define COBT_CMOS_SYS_SERNUM_2		NVRAM_OFFSET(0x42)
-#define COBT_CMOS_SYS_SERNUM_3		NVRAM_OFFSET(0x43)
-#define COBT_CMOS_SYS_SERNUM_4		NVRAM_OFFSET(0x44)
-#define COBT_CMOS_SYS_SERNUM_5		NVRAM_OFFSET(0x45)
-#define COBT_CMOS_SYS_SERNUM_6		NVRAM_OFFSET(0x46)
-#define COBT_CMOS_SYS_SERNUM_7		NVRAM_OFFSET(0x47)
-#define COBT_CMOS_SYS_SERNUM_8		NVRAM_OFFSET(0x48)
-#define COBT_CMOS_SYS_SERNUM_9		NVRAM_OFFSET(0x49)
-#define COBT_CMOS_SYS_SERNUM_10		NVRAM_OFFSET(0x4a)
-#define COBT_CMOS_SYS_SERNUM_11		NVRAM_OFFSET(0x4b)
-#define COBT_CMOS_SYS_SERNUM_12		NVRAM_OFFSET(0x4c)
-/* checksum for serial num - 1 byte */
-#define COBT_CMOS_SYS_SERNUM_CSUM	NVRAM_OFFSET(0x4f)
-
-#define COBT_CMOS_ROM_REV_MAJ		NVRAM_OFFSET(0x50)
-#define COBT_CMOS_ROM_REV_MIN		NVRAM_OFFSET(0x51)
-#define COBT_CMOS_ROM_REV_REV		NVRAM_OFFSET(0x52)
-
-#define COBT_CMOS_BTO_CODE_0		NVRAM_OFFSET(0x53)
-#define COBT_CMOS_BTO_CODE_1		NVRAM_OFFSET(0x54)
-#define COBT_CMOS_BTO_CODE_2		NVRAM_OFFSET(0x55)
-#define COBT_CMOS_BTO_CODE_3		NVRAM_OFFSET(0x56)
-
-#define COBT_CMOS_BTO_IP_CSUM		NVRAM_OFFSET(0x57)
-#define COBT_CMOS_BTO_IP_0		NVRAM_OFFSET(0x58)
-#define COBT_CMOS_BTO_IP_1		NVRAM_OFFSET(0x59)
-#define COBT_CMOS_BTO_IP_2		NVRAM_OFFSET(0x5a)
-#define COBT_CMOS_BTO_IP_3		NVRAM_OFFSET(0x5b)
-
-#endif /* COBALT_NVRAM_H */
-- 
cgit v1.2.3


From dcf5008db171211e3c34c060cacfd788306b034b Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Sun, 15 Jul 2007 23:41:09 -0700
Subject: remove unused lock_cpu_hotplug_interruptible definition

aa95387774039096c11803c04011f1aa42d85758 removed the implementation of
lock_cpu_hotplug_interruptible and all users of it.  This stub definition
for !CONFIG_HOTPLUG_CPU was left over -- kill it now.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 3b2df2523f1d..c2236bbff412 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -120,7 +120,6 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex)
 
 #define lock_cpu_hotplug()	do { } while (0)
 #define unlock_cpu_hotplug()	do { } while (0)
-#define lock_cpu_hotplug_interruptible() 0
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 #define register_hotcpu_notifier(nb)	do { (void)(nb); } while (0)
 #define unregister_hotcpu_notifier(nb)	do { (void)(nb); } while (0)
-- 
cgit v1.2.3


From f4895925976977aaeda26ee2a603a99f17db500b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:41:13 -0700
Subject: Remove final two references to "__obsolete_setup" macro

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 56ec4c62eee0..5b5285316339 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -171,9 +171,6 @@ struct obs_kernel_param {
 #define __setup(str, fn)					\
 	__setup_param(str, fn, fn, 0)
 
-#define __obsolete_setup(str)					\
-	__setup_null_param(str, __LINE__)
-
 /* NOTE: fn is as per module_param, not __setup!  Emits warning if fn
  * returns non-zero. */
 #define early_param(str, fn)					\
@@ -239,7 +236,6 @@ void __init parse_early_param(void);
 #define __setup_param(str, unique_id, fn)	/* nothing */
 #define __setup_null_param(str, unique_id) 	/* nothing */
 #define __setup(str, func) 			/* nothing */
-#define __obsolete_setup(str) 			/* nothing */
 #endif
 
 /* Data marked not to be saved by software suspend */
-- 
cgit v1.2.3


From 213dd266d48af90c1eec8688c1ff31aa34d21de2 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 15 Jul 2007 23:41:15 -0700
Subject: namespace: ensure clone_flags are always stored in an unsigned long

While working on unshare support for the network namespace I noticed we
were putting clone flags in an int.  Which is weird because the syscall
uses unsigned long and we at least need an unsigned to properly hold all of
the unshare flags.

So to make the code consistent, this patch updates the code to use
unsigned long instead of int for the clone flags in those places
where we get it wrong today.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mnt_namespace.h | 2 +-
 include/linux/nsproxy.h       | 2 +-
 include/linux/pid_namespace.h | 2 +-
 include/linux/utsname.h       | 3 ++-
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 1fa4d9813b31..8eed44f8ca73 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -14,7 +14,7 @@ struct mnt_namespace {
 	int event;
 };
 
-extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *,
+extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void __put_mnt_ns(struct mnt_namespace *ns);
 
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 6d179a397bfb..ce06188b7a56 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -32,7 +32,7 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
-int copy_namespaces(int flags, struct task_struct *tsk);
+int copy_namespaces(unsigned long flags, struct task_struct *tsk);
 void get_task_namespaces(struct task_struct *tsk);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 169c6c24209b..b9a17e08ff0f 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -29,7 +29,7 @@ static inline void get_pid_ns(struct pid_namespace *ns)
 	kref_get(&ns->kref);
 }
 
-extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
 extern void free_pid_ns(struct kref *kref);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 51ad167611e4..923db99175f2 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -48,7 +48,8 @@ static inline void get_uts_ns(struct uts_namespace *ns)
 	kref_get(&ns->kref);
 }
 
-extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns);
+extern struct uts_namespace *copy_utsname(unsigned long flags,
+					struct uts_namespace *ns);
 extern void free_uts_ns(struct kref *kref);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
-- 
cgit v1.2.3


From 132e4b0a049c39337c535501561b8301c7f2b202 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 15 Jul 2007 23:41:19 -0700
Subject: cdrom: replace hard-coded constants by kernel.h macro.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cdrom.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index f50f04bdbc16..2b641b176e7f 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -414,8 +414,8 @@ struct cdrom_generic_command
 #define CDO_CHECK_TYPE		0x10    /* check type on open for data */
 
 /* Special codes used when specifying changer slots. */
-#define CDSL_NONE       	((int) (~0U>>1)-1)
-#define CDSL_CURRENT    	((int) (~0U>>1))
+#define CDSL_NONE       	(INT_MAX-1)
+#define CDSL_CURRENT    	INT_MAX
 
 /* For partition based multisession access. IDE can handle 64 partitions
  * per drive - SCSI CD-ROM's use minors to differentiate between the
-- 
cgit v1.2.3


From 1d9d02feeee89e9132034d504c9a45eeaf618a3d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@cpushare.com>
Date: Sun, 15 Jul 2007 23:41:32 -0700
Subject: move seccomp from /proc to a prctl

This reduces the memory footprint and it enforces that only the current
task can enable seccomp on itself (this is a requirement for a
strightforward [modulo preempt ;) ] TIF_NOTSC implementation).

Signed-off-by: Andrea Arcangeli <andrea@cpushare.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h   |  4 ++++
 include/linux/seccomp.h | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 52a9be41250d..e2eff9079fe9 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -59,4 +59,8 @@
 # define PR_ENDIAN_LITTLE	1	/* True little endian mode */
 # define PR_ENDIAN_PPC_LITTLE	2	/* "PowerPC" pseudo little endian */
 
+/* Get/set process seccomp mode */
+#define PR_GET_SECCOMP	21
+#define PR_SET_SECCOMP	22
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 3e8b1cf54303..d708974dbfe3 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -4,8 +4,6 @@
 
 #ifdef CONFIG_SECCOMP
 
-#define NR_SECCOMP_MODES 1
-
 #include <linux/thread_info.h>
 #include <asm/seccomp.h>
 
@@ -23,6 +21,9 @@ static inline int has_secure_computing(struct thread_info *ti)
 	return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP));
 }
 
+extern long prctl_get_seccomp(void);
+extern long prctl_set_seccomp(unsigned long);
+
 #else /* CONFIG_SECCOMP */
 
 typedef struct { } seccomp_t;
@@ -34,6 +35,16 @@ static inline int has_secure_computing(struct thread_info *ti)
 	return 0;
 }
 
+static inline long prctl_get_seccomp(void)
+{
+	return -EINVAL;
+}
+
+static inline long prctl_set_seccomp(unsigned long arg2)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_SECCOMP */
 
 #endif /* _LINUX_SECCOMP_H */
-- 
cgit v1.2.3


From cf99abace7e07dd8491e7093a9a9ef11d48838ed Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@cpushare.com>
Date: Sun, 15 Jul 2007 23:41:33 -0700
Subject: make seccomp zerocost in schedule

This follows a suggestion from Chuck Ebbert on how to make seccomp
absolutely zerocost in schedule too.  The only remaining footprint of
seccomp is in terms of the bzImage size that becomes a few bytes (perhaps
even a few kbytes) larger, measure it if you care in the embedded.

Signed-off-by: Andrea Arcangeli <andrea@cpushare.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seccomp.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index d708974dbfe3..262a8dccfa81 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -16,11 +16,6 @@ static inline void secure_computing(int this_syscall)
 		__secure_computing(this_syscall);
 }
 
-static inline int has_secure_computing(struct thread_info *ti)
-{
-	return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP));
-}
-
 extern long prctl_get_seccomp(void);
 extern long prctl_set_seccomp(unsigned long);
 
@@ -29,11 +24,6 @@ extern long prctl_set_seccomp(unsigned long);
 typedef struct { } seccomp_t;
 
 #define secure_computing(x) do { } while (0)
-/* static inline to preserve typechecking */
-static inline int has_secure_computing(struct thread_info *ti)
-{
-	return 0;
-}
 
 static inline long prctl_get_seccomp(void)
 {
-- 
cgit v1.2.3


From cc2ea416b2aa04d0c34ff2281a23dae5b76b7b3b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 15 Jul 2007 23:41:38 -0700
Subject: uninline check_signature()

This is a rather bizarre thing to have inlined in io.h.  Stick it in lib/
instead.

While we're there, despaghetti it a bit, and fix its off-by-one behaviour when
passed a zero length.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/io.h | 29 ++---------------------------
 1 file changed, 2 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 8423dd376514..e3b2dda6c8eb 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -63,32 +63,7 @@ void __iomem * devm_ioremap(struct device *dev, unsigned long offset,
 void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset,
 				    unsigned long size);
 void devm_iounmap(struct device *dev, void __iomem *addr);
-
-/**
- *	check_signature		-	find BIOS signatures
- *	@io_addr: mmio address to check
- *	@signature:  signature block
- *	@length: length of signature
- *
- *	Perform a signature comparison with the mmio address io_addr. This
- *	address should have been obtained by ioremap.
- *	Returns 1 on a match.
- */
-
-static inline int check_signature(const volatile void __iomem *io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
+int check_signature(const volatile void __iomem *io_addr,
+			const unsigned char *signature, int length);
 
 #endif /* _LINUX_IO_H */
-- 
cgit v1.2.3


From 608e2619682e951f525b08e7a48669a3c0263b41 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 15 Jul 2007 23:41:39 -0700
Subject: generic bug: use show_regs() instead of dump_stack()

The current generic bug implementation has a call to dump_stack() in case a
WARN_ON(whatever) gets hit.  Since report_bug(), which calls dump_stack(),
gets called from an exception handler we can do better: just pass the
pt_regs structure to report_bug() and pass it to show_regs() in case of a
warning.  This will give more debug informations like register contents,
etc...  In addition this avoids some pointless lines that dump_stack()
emits, since it includes a stack backtrace of the exception handler which
is of no interest in case of a warning.  E.g.  on s390 the following lines
are currently always present in a stack backtrace if dump_stack() gets
called from report_bug():

 [<000000000001517a>] show_trace+0x92/0xe8)
 [<0000000000015270>] show_stack+0xa0/0xd0
 [<00000000000152ce>] dump_stack+0x2e/0x3c
 [<0000000000195450>] report_bug+0x98/0xf8
 [<0000000000016cc8>] illegal_op+0x1fc/0x21c
 [<00000000000227d6>] sysc_return+0x0/0x10

Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 42aa0a54b6f4..54398d2c6d8d 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -10,6 +10,8 @@ enum bug_trap_type {
 	BUG_TRAP_TYPE_BUG = 2,
 };
 
+struct pt_regs;
+
 #ifdef CONFIG_GENERIC_BUG
 #include <asm-generic/bug.h>
 
@@ -20,7 +22,7 @@ static inline int is_warning_bug(const struct bug_entry *bug)
 
 const struct bug_entry *find_bug(unsigned long bugaddr);
 
-enum bug_trap_type report_bug(unsigned long bug_addr);
+enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs);
 
 int  module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
 			 struct module *);
@@ -31,7 +33,8 @@ int is_valid_bugaddr(unsigned long addr);
 
 #else	/* !CONFIG_GENERIC_BUG */
 
-static inline enum bug_trap_type report_bug(unsigned long bug_addr)
+static inline enum bug_trap_type report_bug(unsigned long bug_addr,
+					    struct pt_regs *regs)
 {
 	return BUG_TRAP_TYPE_BUG;
 }
-- 
cgit v1.2.3


From d52988023a37720e9e4aeb66362be67fa21d8836 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Sun, 15 Jul 2007 23:41:42 -0700
Subject: Remove the last few UMSDOS leftovers

The UMSDOS filesystem was removed back in 2.6.11, but some tiny bits stuck
around.  This patch removes the few remaining leftovers.  The only things
left behind after this are the entries in the CREDITS file and the ioctl
number in Documentation/ioctl-number.txt as documentation.

This third (hopefully final) version of the patch doesn't edit the
arch/um/config.release file, since Jeff Dike pointed out to me that it
should die completely, and asked me to remove it from my patch as he'll
send in a seperate patch removing the file completely.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ncp_fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index 83e39eb054d3..88766e43e121 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -148,8 +148,6 @@ struct ncp_nls_ioctl
 #include <linux/ncp_fs_i.h>
 #include <linux/ncp_fs_sb.h>
 
-/* undef because public define in umsdos_fs.h (ncp_fs.h isn't public) */
-#undef PRINTK
 /* define because it is easy to change PRINTK to {*}PRINTK */
 #define PRINTK(format, args...) printk(KERN_DEBUG format , ## args)
 
-- 
cgit v1.2.3


From f5a421a4509a7e2dff11da0f01b0548f4f84d503 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 15 Jul 2007 23:41:44 -0700
Subject: rename cancel_rearming_delayed_work() to cancel_delayed_work_sync()

Imho, the current naming of cancel_xxx workqueue functions is very confusing.

	cancel_delayed_work()
	cancel_rearming_delayed_work()
	cancel_rearming_delayed_workqueue()	// obsolete

	cancel_work_sync()

This looks as if the first 2 functions differ in "type" of their argument
which is not true any longer, nowadays the difference is the behaviour.

The semantics of cancel_rearming_delayed_work(dwork) was changed
significantly, it doesn't require that dwork rearms itself, and cancels dwork
synchronously.

Rename it to cancel_delayed_work_sync().  This matches cancel_delayed_work()
and cancel_work_sync().  Re-create cancel_rearming_delayed_work() as a simple
inline obsolete wrapper, like cancel_rearming_delayed_workqueue().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce0719a2cfeb..5c89ac6e7f55 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -166,14 +166,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern void cancel_rearming_delayed_work(struct delayed_work *work);
+extern void cancel_delayed_work_sync(struct delayed_work *work);
 
-/* Obsolete. use cancel_rearming_delayed_work() */
+/* Obsolete. use cancel_delayed_work_sync() */
 static inline
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 					struct delayed_work *work)
 {
-	cancel_rearming_delayed_work(work);
+	cancel_delayed_work_sync(work);
+}
+
+/* Obsolete. use cancel_delayed_work_sync() */
+static inline
+void cancel_rearming_delayed_work(struct delayed_work *work)
+{
+	cancel_delayed_work_sync(work);
 }
 
 #endif
-- 
cgit v1.2.3


From 1f1f642e2f092e37eb9038060eb0100c44f55a11 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 15 Jul 2007 23:41:44 -0700
Subject: make cancel_xxx_work_sync() return a boolean

Change cancel_work_sync() and cancel_delayed_work_sync() to return a boolean
indicating whether the work was actually cancelled.  A zero return value means
that the work was not pending/queued.

Without that kind of change it is not possible to avoid flush_workqueue()
sometimes, see the next patch as an example.

Also, this patch unifies both functions and kills the (unlikely) busy-wait
loop.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5c89ac6e7f55..ce6badc98f6d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -148,7 +148,7 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
-extern void cancel_work_sync(struct work_struct *work);
+extern int cancel_work_sync(struct work_struct *work);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
@@ -166,7 +166,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern void cancel_delayed_work_sync(struct delayed_work *work);
+extern int cancel_delayed_work_sync(struct delayed_work *work);
 
 /* Obsolete. use cancel_delayed_work_sync() */
 static inline
-- 
cgit v1.2.3


From 8f8a68ee486e1c81eaead3c521822bf86142d380 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 15 Jul 2007 23:41:52 -0700
Subject: remove mm/backing-dev.c:congestion_wait_interruptible()

congestion_wait_interruptible() is no longer used.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f2542c24b328..7011d6255593 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -93,7 +93,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
 void set_bdi_congested(struct backing_dev_info *bdi, int rw);
 long congestion_wait(int rw, long timeout);
-long congestion_wait_interruptible(int rw, long timeout);
 void congestion_end(int rw);
 
 #define bdi_cap_writeback_dirty(bdi) \
-- 
cgit v1.2.3


From 2235219b7721b8e74de6841e79240936561a2b63 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 15 Jul 2007 23:41:58 -0700
Subject: ext2: statfs speed up

This is a patch that speeds up statfs.  It is very simple - the "overhead"
calculation, which takes a huge amount of time for large filesystems, never
changes unless the size of the filesystem itself changes.  That means we can
store it in memory and only recalculate if the filesystem has been resized
(almost never).

It also fixes a minor problem that we never update the on-disk superblock free
blocks/inodes counts until the filesystem is unmounted.  While not fatal, we
may as well update that on disk when we have the information, and it makes
things like debugfs and dumpe2fs report a bit more accurate info.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs_sb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index 4eda0ed76a48..d149f2959e67 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -33,6 +33,8 @@ struct ext2_sb_info {
 	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
 	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
 	unsigned long s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext2_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
-- 
cgit v1.2.3


From a71ce8c6c9bf269b192f352ea555217815cf027e Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 15 Jul 2007 23:41:59 -0700
Subject: ext3: statfs speed up

This is a patch that speeds up statfs.  It is very simple - the "overhead"
calculation, which takes a huge amount of time for large filesystems, never
changes unless the size of the filesystem itself changes.  That means we can
store it in memory and only recalculate if the filesystem has been resized
(almost never).

It also fixes a minor problem that we never update the on-disk superblock free
blocks/inodes counts until the filesystem is unmounted.  While not fatal, we
may as well update that on disk when we have the information, and it makes
things like debugfs and dumpe2fs report a bit more accurate info.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext3_fs_sb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index f61309c81cc4..d3c08353edf6 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -38,6 +38,8 @@ struct ext3_sb_info {
 	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
 	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
 	unsigned long s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext3_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
-- 
cgit v1.2.3


From 5e70030d4cf91613530a23b40ad9919bb9ee114f Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 15 Jul 2007 23:42:00 -0700
Subject: ext4: statfs speed up

This is a patch that speeds up statfs.  It is very simple - the "overhead"
calculation, which takes a huge amount of time for large filesystems, never
changes unless the size of the filesystem itself changes.  That means we can
store it in memory and only recalculate if the filesystem has been resized
(almost never).

It also fixes a minor problem that we never update the on-disk superblock free
blocks/inodes counts until the filesystem is unmounted.  While not fatal, we
may as well update that on disk when we have the information, and it makes
things like debugfs and dumpe2fs report a bit more accurate info.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext4_fs_sb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 691a713139ce..2347557a327a 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -39,6 +39,8 @@ struct ext4_sb_info {
 	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
 	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
 	unsigned long s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext4_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
-- 
cgit v1.2.3


From 2e27afb300b56d83bb03fbfa68852b9c1e2920c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.linux-foundation.org>
Date: Mon, 16 Jul 2007 14:31:08 -0700
Subject: Revert "[NET]: Fix races in net_rx_action vs netpoll."

This reverts commit 29578624e354f56143d92510fff33a8b2aaa2c03.

Ingo Molnar reports complete breakage with his e1000 card (no
networking, card reports transmit timeouts), and bisected it down to
this commit.  Let's figure out what went wrong, but not keep breaking
machines until we do.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Olaf Kirch <olaf.kirch@oracle.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/netdevice.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 322b5eae57dd..da7a13c97eb8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -262,8 +262,6 @@ enum netdev_state_t
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
 	__LINK_STATE_QDISC_RUNNING,
-	/* Set by the netpoll NAPI code */
-	__LINK_STATE_POLL_LIST_FROZEN,
 };
 
 
@@ -1022,14 +1020,6 @@ static inline void netif_rx_complete(struct net_device *dev)
 {
 	unsigned long flags;
 
-#ifdef CONFIG_NETPOLL
-	/* Prevent race with netpoll - yes, this is a kludge.
-	 * But at least it doesn't penalize the non-netpoll
-	 * code path. */
-	if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state))
-		return;
-#endif
-
 	local_irq_save(flags);
 	__netif_rx_complete(dev);
 	local_irq_restore(flags);
-- 
cgit v1.2.3


From 13bd59a111760bb7cba8dcf17b6b55a0d99d3592 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Tue, 17 Jul 2007 14:18:47 +0200
Subject: Don't define empty struct bsg_class_device if !CONFIG_BLK_DEV_BSG

Don't define an empty struct bsg_class_device if !CONFIG_BLK_DEV_BSG.

It's embedded in struct request_queue, but there we have

#if defined(CONFIG_BLK_DEV_BSG)
	struct bsg_class_device bsg_dev;
#endif

anyway.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bsg.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index bd998ca6cb2e..8547b10c388b 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -60,7 +60,6 @@ struct bsg_class_device {
 extern int bsg_register_queue(struct request_queue *, const char *);
 extern void bsg_unregister_queue(struct request_queue *);
 #else
-struct bsg_class_device { };
 #define bsg_register_queue(disk, name)		(0)
 #define bsg_unregister_queue(disk)	do { } while (0)
 #endif
-- 
cgit v1.2.3


From 769848c03895b63e5662eb7e4ec8c4866f7d0183 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 17 Jul 2007 04:03:05 -0700
Subject: Add __GFP_MOVABLE for callers to flag allocations from high memory
 that may be migrated

It is often known at allocation time whether a page may be migrated or not.
This patch adds a flag called __GFP_MOVABLE and a new mask called
GFP_HIGH_MOVABLE.  Allocations using the __GFP_MOVABLE can be either migrated
using the page migration mechanism or reclaimed by syncing with backing
storage and discarding.

An API function very similar to alloc_zeroed_user_highpage() is added for
__GFP_MOVABLE allocations called alloc_zeroed_user_highpage_movable().  The
flags used by alloc_zeroed_user_highpage() are not changed because it would
change the semantics of an existing API.  After this patch is applied there
are no in-kernel users of alloc_zeroed_user_highpage() so it probably should
be marked deprecated if this patch is merged.

Note that this patch includes a minor cleanup to the use of __GFP_ZERO in
shmem.c to keep all flag modifications to inode->mapping in the
shmem_dir_alloc() helper function.  This clean-up suggestion is courtesy of
Hugh Dickens.

Additional credit goes to Christoph Lameter and Linus Torvalds for shaping the
concept.  Credit to Hugh Dickens for catching issues with shmem swap vector
and ramfs allocations.

[akpm@linux-foundation.org: build fix]
[hugh@veritas.com: __GFP_ZERO cleanup]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h     | 16 +++++++++++++++-
 include/linux/highmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0d2ef0b082a6..e5882fe49f83 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,9 @@ struct vm_area_struct;
  * cannot handle allocation failures.
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ *
+ * __GFP_MOVABLE: Flag that this page will be movable by the page migration
+ * mechanism or reclaimed
  */
 #define __GFP_WAIT	((__force gfp_t)0x10u)	/* Can wait and reschedule? */
 #define __GFP_HIGH	((__force gfp_t)0x20u)	/* Should access emergency pools? */
@@ -45,6 +48,7 @@ struct vm_area_struct;
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_MOVABLE	((__force gfp_t)0x80000u) /* Page is movable */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -53,7 +57,8 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \
+			__GFP_MOVABLE)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
@@ -65,6 +70,15 @@ struct vm_area_struct;
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+				 __GFP_HARDWALL | __GFP_HIGHMEM | \
+				 __GFP_MOVABLE)
+#define GFP_NOFS_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
+#define GFP_USER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+				 __GFP_HARDWALL | __GFP_MOVABLE)
+#define GFP_HIGHUSER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+				 __GFP_HARDWALL | __GFP_HIGHMEM | \
+				 __GFP_MOVABLE)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 98e2cce996a4..12c5e4e3135a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -73,10 +73,27 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
 }
 
 #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+/**
+ * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags
+ * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE
+ * @vma: The VMA the page is to be allocated for
+ * @vaddr: The virtual address the page will be inserted into
+ *
+ * This function will allocate a page for a VMA but the caller is expected
+ * to specify via movableflags whether the page will be movable in the
+ * future or not
+ *
+ * An architecture may override this function by defining
+ * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own
+ * implementation.
+ */
 static inline struct page *
-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+__alloc_zeroed_user_highpage(gfp_t movableflags,
+			struct vm_area_struct *vma,
+			unsigned long vaddr)
 {
-	struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
+	struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
+			vma, vaddr);
 
 	if (page)
 		clear_user_highpage(page, vaddr);
@@ -85,6 +102,36 @@ alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
 }
 #endif
 
+/**
+ * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA
+ * @vma: The VMA the page is to be allocated for
+ * @vaddr: The virtual address the page will be inserted into
+ *
+ * This function will allocate a page for a VMA that the caller knows will
+ * not be able to move in the future using move_pages() or reclaim. If it
+ * is known that the page can move, use alloc_zeroed_user_highpage_movable
+ */
+static inline struct page *
+alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+{
+	return __alloc_zeroed_user_highpage(0, vma, vaddr);
+}
+
+/**
+ * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
+ * @vma: The VMA the page is to be allocated for
+ * @vaddr: The virtual address the page will be inserted into
+ *
+ * This function will allocate a page for a VMA that the caller knows will
+ * be able to migrate in the future using move_pages() or reclaimed
+ */
+static inline struct page *
+alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+					unsigned long vaddr)
+{
+	return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
+}
+
 static inline void clear_highpage(struct page *page)
 {
 	void *kaddr = kmap_atomic(page, KM_USER0);
-- 
cgit v1.2.3


From 2a1e274acf0b1c192face19a4be7c12d4503eaaf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 17 Jul 2007 04:03:12 -0700
Subject: Create the ZONE_MOVABLE zone

The following 8 patches against 2.6.20-mm2 create a zone called ZONE_MOVABLE
that is only usable by allocations that specify both __GFP_HIGHMEM and
__GFP_MOVABLE.  This has the effect of keeping all non-movable pages within a
single memory partition while allowing movable allocations to be satisfied
from either partition.  The patches may be applied with the list-based
anti-fragmentation patches that groups pages together based on mobility.

The size of the zone is determined by a kernelcore= parameter specified at
boot-time.  This specifies how much memory is usable by non-movable
allocations and the remainder is used for ZONE_MOVABLE.  Any range of pages
within ZONE_MOVABLE can be released by migrating the pages or by reclaiming.

When selecting a zone to take pages from for ZONE_MOVABLE, there are two
things to consider.  First, only memory from the highest populated zone is
used for ZONE_MOVABLE.  On the x86, this is probably going to be ZONE_HIGHMEM
but it would be ZONE_DMA on ppc64 or possibly ZONE_DMA32 on x86_64.  Second,
the amount of memory usable by the kernel will be spread evenly throughout
NUMA nodes where possible.  If the nodes are not of equal size, the amount of
memory usable by the kernel on some nodes may be greater than others.

By default, the zone is not as useful for hugetlb allocations because they are
pinned and non-migratable (currently at least).  A sysctl is provided that
allows huge pages to be allocated from that zone.  This means that the huge
page pool can be resized to the size of ZONE_MOVABLE during the lifetime of
the system assuming that pages are not mlocked.  Despite huge pages being
non-movable, we do not introduce additional external fragmentation of note as
huge pages are always the largest contiguous block we care about.

Credit goes to Andy Whitcroft for catching a large variety of problems during
review of the patches.

This patch creates an additional zone, ZONE_MOVABLE.  This zone is only usable
by allocations which specify both __GFP_HIGHMEM and __GFP_MOVABLE.  Hot-added
memory continues to be placed in their existing destination as there is no
mechanism to redirect them to a specific zone.

[y-goto@jp.fujitsu.com: Fix section mismatch of memory hotplug related code]
[akpm@linux-foundation.org: various fixes]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    |  3 +++
 include/linux/mm.h     |  1 +
 include/linux/mmzone.h | 20 ++++++++++++++++++--
 include/linux/vmstat.h |  5 +++--
 4 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e5882fe49f83..bc68dd9a6d41 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -106,6 +106,9 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 	if (flags & __GFP_DMA32)
 		return ZONE_DMA32;
 #endif
+	if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
+			(__GFP_HIGHMEM | __GFP_MOVABLE))
+		return ZONE_MOVABLE;
 #ifdef CONFIG_HIGHMEM
 	if (flags & __GFP_HIGHMEM)
 		return ZONE_HIGHMEM;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 97d0cddfd223..857e44817178 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1005,6 +1005,7 @@ extern unsigned long find_max_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 extern void sparse_memory_present_with_active_regions(int nid);
+extern int cmdline_parse_kernelcore(char *p);
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 extern int early_pfn_to_nid(unsigned long pfn);
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 04b1636a970b..d71ff763c9df 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -146,6 +146,7 @@ enum zone_type {
 	 */
 	ZONE_HIGHMEM,
 #endif
+	ZONE_MOVABLE,
 	MAX_NR_ZONES
 };
 
@@ -167,6 +168,7 @@ enum zone_type {
 	+ defined(CONFIG_ZONE_DMA32)	\
 	+ 1				\
 	+ defined(CONFIG_HIGHMEM)	\
+	+ 1				\
 )
 #if __ZONE_COUNT < 2
 #define ZONES_SHIFT 0
@@ -499,10 +501,22 @@ static inline int populated_zone(struct zone *zone)
 	return (!!zone->present_pages);
 }
 
+extern int movable_zone;
+
+static inline int zone_movable_is_highmem(void)
+{
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP)
+	return movable_zone == ZONE_HIGHMEM;
+#else
+	return 0;
+#endif
+}
+
 static inline int is_highmem_idx(enum zone_type idx)
 {
 #ifdef CONFIG_HIGHMEM
-	return (idx == ZONE_HIGHMEM);
+	return (idx == ZONE_HIGHMEM ||
+		(idx == ZONE_MOVABLE && zone_movable_is_highmem()));
 #else
 	return 0;
 #endif
@@ -522,7 +536,9 @@ static inline int is_normal_idx(enum zone_type idx)
 static inline int is_highmem(struct zone *zone)
 {
 #ifdef CONFIG_HIGHMEM
-	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+	int zone_idx = zone - zone->zone_pgdat->node_zones;
+	return zone_idx == ZONE_HIGHMEM ||
+		(zone_idx == ZONE_MOVABLE && zone_movable_is_highmem());
 #else
 	return 0;
 #endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d9325cf8a134..75370ec0923e 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -25,7 +25,7 @@
 #define HIGHMEM_ZONE(xx)
 #endif
 
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE
 
 enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGALLOC),
@@ -170,7 +170,8 @@ static inline unsigned long node_page_state(int node,
 #ifdef CONFIG_HIGHMEM
 		zone_page_state(&zones[ZONE_HIGHMEM], item) +
 #endif
-		zone_page_state(&zones[ZONE_NORMAL], item);
+		zone_page_state(&zones[ZONE_NORMAL], item) +
+		zone_page_state(&zones[ZONE_MOVABLE], item);
 }
 
 extern void zone_statistics(struct zonelist *, struct zone *);
-- 
cgit v1.2.3


From 396faf0303d273219db5d7eb4a2879ad977ed185 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 17 Jul 2007 04:03:13 -0700
Subject: Allow huge page allocations to use GFP_HIGH_MOVABLE

Huge pages are not movable so are not allocated from ZONE_MOVABLE.  However,
as ZONE_MOVABLE will always have pages that can be migrated or reclaimed, it
can be used to satisfy hugepage allocations even when the system has been
running a long time.  This allows an administrator to resize the hugepage pool
at runtime depending on the size of ZONE_MOVABLE.

This patch adds a new sysctl called hugepages_treat_as_movable.  When a
non-zero value is written to it, future allocations for the huge page pool
will use ZONE_MOVABLE.  Despite huge pages being non-movable, we do not
introduce additional external fragmentation of note as huge pages are always
the largest contiguous block we care about.

[akpm@linux-foundation.org: various fixes]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h   | 2 ++
 include/linux/mempolicy.h | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2c13715e9dde..49b7053043ad 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -15,6 +15,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 }
 
 int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
 void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
@@ -29,6 +30,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
+extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index daabb3aa1ec6..e147cf50529f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
 
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
-		unsigned long addr);
+		unsigned long addr, gfp_t gfp_flags);
 extern unsigned slab_node(struct mempolicy *policy);
 
 extern enum zone_type policy_zone;
@@ -256,9 +256,9 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
 #define set_cpuset_being_rebound(x) do {} while (0)
 
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
-		unsigned long addr)
+		unsigned long addr, gfp_t gfp_flags)
 {
-	return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+	return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
 }
 
 static inline int do_migrate_pages(struct mm_struct *mm,
-- 
cgit v1.2.3


From ed7ed365172e27b0efe9d43cc962723c7193e34e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 17 Jul 2007 04:03:14 -0700
Subject: handle kernelcore=: generic

This patch adds the kernelcore= parameter for x86.

Once all patches are applied, a new command-line parameter exist and a new
sysctl.  This patch adds the necessary documentation.

From: Yasunori Goto <y-goto@jp.fujitsu.com>

  When "kernelcore" boot option is specified, kernel can't boot up on ia64
  because of an infinite loop.  In addition, the parsing code can be handled
  in an architecture-independent manner.

  This patch uses common code to handle the kernelcore= parameter.  It is
  only available to architectures that support arch-independent zone-sizing
  (i.e.  define CONFIG_ARCH_POPULATES_NODE_MAP).  Other architectures will
  ignore the boot parameter.

[bunk@stusta.de: make cmdline_parse_kernelcore() static]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 857e44817178..97d0cddfd223 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1005,7 +1005,6 @@ extern unsigned long find_max_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 extern void sparse_memory_present_with_active_regions(int nid);
-extern int cmdline_parse_kernelcore(char *p);
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 extern int early_pfn_to_nid(unsigned long pfn);
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
-- 
cgit v1.2.3


From 5ad333eb66ff1e52a87639822ae088577669dcf9 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Tue, 17 Jul 2007 04:03:16 -0700
Subject: Lumpy Reclaim V4

When we are out of memory of a suitable size we enter reclaim.  The current
reclaim algorithm targets pages in LRU order, which is great for fairness at
order-0 but highly unsuitable if you desire pages at higher orders.  To get
pages of higher order we must shoot down a very high proportion of memory;
>95% in a lot of cases.

This patch set adds a lumpy reclaim algorithm to the allocator.  It targets
groups of pages at the specified order anchored at the end of the active and
inactive lists.  This encourages groups of pages at the requested orders to
move from active to inactive, and active to free lists.  This behaviour is
only triggered out of direct reclaim when higher order pages have been
requested.

This patch set is particularly effective when utilised with an
anti-fragmentation scheme which groups pages of similar reclaimability
together.

This patch set is based on Peter Zijlstra's lumpy reclaim V2 patch which forms
the foundation.  Credit to Mel Gorman for sanitity checking.

Mel said:

  The patches have an application with hugepage pool resizing.

  When lumpy-reclaim is used used with ZONE_MOVABLE, the hugepages pool can
  be resized with greater reliability.  Testing on a desktop machine with 2GB
  of RAM showed that growing the hugepage pool with ZONE_MOVABLE on it's own
  was very slow as the success rate was quite low.  Without lumpy-reclaim,
  each attempt to grow the pool by 100 pages would yield 1 or 2 hugepages.
  With lumpy-reclaim, getting 40 to 70 hugepages on each attempt was typical.

[akpm@osdl.org: ia64 pfn_to_nid fixes and loop cleanup]
[bunk@stusta.de: static declarations for internal functions]
[a.p.zijlstra@chello.nl: initial lumpy V2 implementation]
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Bob Picco <bob.picco@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 8 ++++++++
 include/linux/swap.h   | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d71ff763c9df..da8eb8ad9e9b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -24,6 +24,14 @@
 #endif
 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
 
+/*
+ * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
+ * costly to service.  That is between allocation orders which should
+ * coelesce naturally under reasonable reclaim pressure and those which
+ * will not.
+ */
+#define PAGE_ALLOC_COSTLY_ORDER 3
+
 struct free_area {
 	struct list_head	free_list;
 	unsigned long		nr_free;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 006868881346..665f85f2a3af 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
-extern unsigned long try_to_free_pages(struct zone **, gfp_t);
+extern unsigned long try_to_free_pages(struct zone **zones, int order,
+					gfp_t gfp_mask);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
-- 
cgit v1.2.3


From 8e1f936b73150f5095448a0fee6d4f30a1f9001d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 17 Jul 2007 04:03:17 -0700
Subject: mm: clean up and kernelify shrinker registration

I can never remember what the function to register to receive VM pressure
is called.  I have to trace down from __alloc_pages() to find it.

It's called "set_shrinker()", and it needs Your Help.

1) Don't hide struct shrinker.  It contains no magic.
2) Don't allocate "struct shrinker".  It's not helpful.
3) Call them "register_shrinker" and "unregister_shrinker".
4) Call the function "shrink" not "shrinker".
5) Reduce the 17 lines of waffly comments to 13, but document it properly.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: David Chinner <dgc@sgi.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 97d0cddfd223..4c482a3ee870 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -810,27 +810,31 @@ extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long flags, unsigned long new_addr);
 
 /*
- * Prototype to add a shrinker callback for ageable caches.
- * 
- * These functions are passed a count `nr_to_scan' and a gfpmask.  They should
- * scan `nr_to_scan' objects, attempting to free them.
+ * A callback you can register to apply pressure to ageable caches.
  *
- * The callback must return the number of objects which remain in the cache.
+ * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'.  It should
+ * look through the least-recently-used 'nr_to_scan' entries and
+ * attempt to free them up.  It should return the number of objects
+ * which remain in the cache.  If it returns -1, it means it cannot do
+ * any scanning at this time (eg. there is a risk of deadlock).
  *
- * The callback will be passed nr_to_scan == 0 when the VM is querying the
- * cache size, so a fastpath for that case is appropriate.
- */
-typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-
-/*
- * Add an aging callback.  The int is the number of 'seeks' it takes
- * to recreate one of the objects that these functions age.
+ * The 'gfpmask' refers to the allocation we are currently trying to
+ * fulfil.
+ *
+ * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
+ * querying the cache size, so a fastpath for that case is appropriate.
  */
+struct shrinker {
+	int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+	int seeks;	/* seeks to recreate an obj */
 
-#define DEFAULT_SEEKS 2
-struct shrinker;
-extern struct shrinker *set_shrinker(int, shrinker_t);
-extern void remove_shrinker(struct shrinker *shrinker);
+	/* These are for internal use */
+	struct list_head list;
+	long nr;	/* objs pending delete */
+};
+#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
+extern void register_shrinker(struct shrinker *);
+extern void unregister_shrinker(struct shrinker *);
 
 /*
  * Some shared mappigns will want the pages marked read-only
-- 
cgit v1.2.3


From 6cb8f91320d3e720351c21741da795fed580b21b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 17 Jul 2007 04:03:22 -0700
Subject: Slab allocators: consistent ZERO_SIZE_PTR support and NULL result
 semantics

Define ZERO_OR_NULL_PTR macro to be able to remove the checks from the
allocators.  Move ZERO_SIZE_PTR related stuff into slab.h.

Make ZERO_SIZE_PTR work for all slab allocators and get rid of the
WARN_ON_ONCE(size == 0) that is still remaining in SLAB.

Make slub return NULL like the other allocators if a too large memory segment
is requested via __kmalloc.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 13 +++++++++++++
 include/linux/slab_def.h | 12 ++++++++++++
 include/linux/slub_def.h | 12 ------------
 3 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 27402fea9b79..0289ec89300a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -30,6 +30,19 @@
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
 
+/*
+ * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
+ *
+ * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault.
+ *
+ * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can.
+ * Both make kfree a no-op.
+ */
+#define ZERO_SIZE_PTR ((void *)16)
+
+#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) < \
+				(unsigned long)ZERO_SIZE_PTR)
+
 /*
  * struct kmem_cache related prototypes
  */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 365d036c454a..16e814ffab8d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -32,6 +32,10 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
+
+		if (!size)
+			return ZERO_SIZE_PTR;
+
 #define CACHE(x) \
 		if (size <= x) \
 			goto found; \
@@ -58,6 +62,10 @@ static inline void *kzalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
+
+		if (!size)
+			return ZERO_SIZE_PTR;
+
 #define CACHE(x) \
 		if (size <= x) \
 			goto found; \
@@ -88,6 +96,10 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
+
+		if (!size)
+			return ZERO_SIZE_PTR;
+
 #define CACHE(x) \
 		if (size <= x) \
 			goto found; \
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a582f6771525..579b0a22858e 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -159,18 +159,6 @@ static inline struct kmem_cache *kmalloc_slab(size_t size)
 #define SLUB_DMA 0
 #endif
 
-
-/*
- * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
- *
- * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault.
- *
- * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can.
- * Both make kfree a no-op.
- */
-#define ZERO_SIZE_PTR ((void *)16)
-
-
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
-- 
cgit v1.2.3


From 0c710013200e72b5e0bc680ff4ec6bdac53c5ce8 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 17 Jul 2007 04:03:24 -0700
Subject: SLUB: add some more inlines and #ifdef CONFIG_SLUB_DEBUG

Add #ifdefs around data structures only needed if debugging is compiled into
SLUB.

Add inlines to small functions to reduce code size.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 579b0a22858e..bae11111458f 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -16,7 +16,9 @@ struct kmem_cache_node {
 	unsigned long nr_partial;
 	atomic_long_t nr_slabs;
 	struct list_head partial;
+#ifdef CONFIG_SLUB_DEBUG
 	struct list_head full;
+#endif
 };
 
 /*
@@ -44,7 +46,9 @@ struct kmem_cache {
 	int align;		/* Alignment */
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
+#ifdef CONFIG_SLUB_DEBUG
 	struct kobject kobj;	/* For sysfs */
+#endif
 
 #ifdef CONFIG_NUMA
 	int defrag_ratio;
-- 
cgit v1.2.3


From 81cda6626178cd55297831296ba8ecedbfd8b52d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 17 Jul 2007 04:03:29 -0700
Subject: Slab allocators: Cleanup zeroing allocations

It becomes now easy to support the zeroing allocs with generic inline
functions in slab.h.  Provide inline definitions to allow the continued use of
kzalloc, kmem_cache_zalloc etc but remove other definitions of zeroing
functions from the slab allocators and util.c.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 77 +++++++++++++++++++++++++++++-------------------
 include/linux/slab_def.h | 30 -------------------
 include/linux/slub_def.h | 13 --------
 3 files changed, 46 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0289ec89300a..0e1d0daef6a2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -55,7 +55,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *, struct kmem_cache *, unsigned long));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
-void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 void kmem_cache_free(struct kmem_cache *, void *);
 unsigned int kmem_cache_size(struct kmem_cache *);
 const char *kmem_cache_name(struct kmem_cache *);
@@ -91,11 +90,37 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void *__kzalloc(size_t, gfp_t);
 void * __must_check krealloc(const void *, size_t, gfp_t);
 void kfree(const void *);
 size_t ksize(const void *);
 
+/*
+ * Allocator specific definitions. These are mainly used to establish optimized
+ * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
+ * selecting the appropriate general cache at compile time.
+ *
+ * Allocators must define at least:
+ *
+ *	kmem_cache_alloc()
+ *	__kmalloc()
+ *	kmalloc()
+ *
+ * Those wishing to support NUMA must also define:
+ *
+ *	kmem_cache_alloc_node()
+ *	kmalloc_node()
+ *
+ * See each allocator definition file for additional comments and
+ * implementation notes.
+ */
+#ifdef CONFIG_SLUB
+#include <linux/slub_def.h>
+#elif defined(CONFIG_SLOB)
+#include <linux/slob_def.h>
+#else
+#include <linux/slab_def.h>
+#endif
+
 /**
  * kcalloc - allocate memory for an array. The memory is set to zero.
  * @n: number of elements.
@@ -151,37 +176,9 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
 	if (n != 0 && size > ULONG_MAX / n)
 		return NULL;
-	return __kzalloc(n * size, flags);
+	return __kmalloc(n * size, flags | __GFP_ZERO);
 }
 
-/*
- * Allocator specific definitions. These are mainly used to establish optimized
- * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
- * selecting the appropriate general cache at compile time.
- *
- * Allocators must define at least:
- *
- *	kmem_cache_alloc()
- *	__kmalloc()
- *	kmalloc()
- *	kzalloc()
- *
- * Those wishing to support NUMA must also define:
- *
- *	kmem_cache_alloc_node()
- *	kmalloc_node()
- *
- * See each allocator definition file for additional comments and
- * implementation notes.
- */
-#ifdef CONFIG_SLUB
-#include <linux/slub_def.h>
-#elif defined(CONFIG_SLOB)
-#include <linux/slob_def.h>
-#else
-#include <linux/slab_def.h>
-#endif
-
 #if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB)
 /**
  * kmalloc_node - allocate memory from a specific node
@@ -255,5 +252,23 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
 
 #endif /* DEBUG_SLAB */
 
+/*
+ * Shortcuts
+ */
+static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
+{
+	return kmem_cache_alloc(k, flags | __GFP_ZERO);
+}
+
+/**
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kmalloc).
+ */
+static inline void *kzalloc(size_t size, gfp_t flags)
+{
+	return kmalloc(size, flags | __GFP_ZERO);
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 16e814ffab8d..32bdc2ffd715 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -58,36 +58,6 @@ found:
 	return __kmalloc(size, flags);
 }
 
-static inline void *kzalloc(size_t size, gfp_t flags)
-{
-	if (__builtin_constant_p(size)) {
-		int i = 0;
-
-		if (!size)
-			return ZERO_SIZE_PTR;
-
-#define CACHE(x) \
-		if (size <= x) \
-			goto found; \
-		else \
-			i++;
-#include "kmalloc_sizes.h"
-#undef CACHE
-		{
-			extern void __you_cannot_kzalloc_that_much(void);
-			__you_cannot_kzalloc_that_much();
-		}
-found:
-#ifdef CONFIG_ZONE_DMA
-		if (flags & GFP_DMA)
-			return kmem_cache_zalloc(malloc_sizes[i].cs_dmacachep,
-						flags);
-#endif
-		return kmem_cache_zalloc(malloc_sizes[i].cs_cachep, flags);
-	}
-	return __kzalloc(size, flags);
-}
-
 #ifdef CONFIG_NUMA
 extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index bae11111458f..07f7e4cbcee3 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -179,19 +179,6 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 		return __kmalloc(size, flags);
 }
 
-static inline void *kzalloc(size_t size, gfp_t flags)
-{
-	if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) {
-		struct kmem_cache *s = kmalloc_slab(size);
-
-		if (!s)
-			return ZERO_SIZE_PTR;
-
-		return kmem_cache_zalloc(s, flags);
-	} else
-		return __kzalloc(size, flags);
-}
-
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-- 
cgit v1.2.3


From b5fab14e5d87df4d94161ae5f5e0c8625f9ffda2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 17 Jul 2007 04:03:33 -0700
Subject: Add VM_BUG_ON in case someone uses page_mapping on a slab page

Detect slab objects being passed to the page oriented functions of the VM.

It is not sufficient to simply return NULL because the functions calling
page_mapping may depend on other items of the page_struct also to be setup
properly.  Moreover slab object may not be properly aligned.  The page
oriented functions of the VM expect to operate on page aligned, page sized
objects.  Operations on object straddling page boundaries may only affect the
objects partially which may lead to surprising results.

It is better to detect eventually remaining uses and eliminate them.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4c482a3ee870..a5c451816fdc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -599,6 +599,7 @@ static inline struct address_space *page_mapping(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	VM_BUG_ON(PageSlab(page));
 	if (unlikely(PageSwapCache(page)))
 		mapping = &swapper_space;
 #ifdef CONFIG_SLUB
-- 
cgit v1.2.3


From 831441862956fffa17b9801db37e6ea1650b0f69 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 Jul 2007 04:03:35 -0700
Subject: Freezer: make kernel threads nonfreezable by default

Currently, the freezer treats all tasks as freezable, except for the kernel
threads that explicitly set the PF_NOFREEZE flag for themselves.  This
approach is problematic, since it requires every kernel thread to either
set PF_NOFREEZE explicitly, or call try_to_freeze(), even if it doesn't
care for the freezing of tasks at all.

It seems better to only require the kernel threads that want to or need to
be frozen to use some freezer-related code and to remove any
freezer-related code from the other (nonfreezable) kernel threads, which is
done in this patch.

The patch causes all kernel threads to be nonfreezable by default (ie.  to
have PF_NOFREEZE set by default) and introduces the set_freezable()
function that should be called by the freezable kernel threads in order to
unset PF_NOFREEZE.  It also makes all of the currently freezable kernel
threads call set_freezable(), so it shouldn't cause any (intentional)
change of behaviour to appear.  Additionally, it updates documentation to
describe the freezing of tasks more accurately.

[akpm@linux-foundation.org: build fixes]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/freezer.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 4631086f5060..2d38b1a74662 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -1,5 +1,8 @@
 /* Freezer declarations */
 
+#ifndef FREEZER_H_INCLUDED
+#define FREEZER_H_INCLUDED
+
 #include <linux/sched.h>
 
 #ifdef CONFIG_PM
@@ -115,6 +118,14 @@ static inline int freezer_should_skip(struct task_struct *p)
 	return !!(p->flags & PF_FREEZER_SKIP);
 }
 
+/*
+ * Tell the freezer that the current task should be frozen by it
+ */
+static inline void set_freezable(void)
+{
+	current->flags &= ~PF_NOFREEZE;
+}
+
 #else
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
@@ -130,4 +141,7 @@ static inline int try_to_freeze(void) { return 0; }
 static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
 static inline int freezer_should_skip(struct task_struct *p) { return 0; }
+static inline void set_freezable(void) {}
 #endif
+
+#endif	/* FREEZER_H_INCLUDED */
-- 
cgit v1.2.3


From 2a7326b5bbafac4c96bcdb944b2a773593030b96 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 17 Jul 2007 04:03:37 -0700
Subject: CONFIG_BOUNCE to avoid useless inclusion of bounce buffer logic

The bounce buffer logic is included on systems that do not need it.  If a
system does not have zones like ZONE_DMA and ZONE_HIGHMEM that can lead to
the use of bounce buffers then there is no need to reserve memory pools etc
etc.  This is true f.e.  for SGI Altix.

Also nicifies the Makefile and gets rid of the tricky "and" there.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b32564a1e105..f78965fc6426 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -624,7 +624,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
  */
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 
-#ifdef CONFIG_MMU
+#ifdef CONFIG_BOUNCE
 extern int init_emergency_isa_pool(void);
 extern void blk_queue_bounce(request_queue_t *q, struct bio **bio);
 #else
-- 
cgit v1.2.3


From bcdcd8e725b923ad7c0de809680d5d5658a7bf8c Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Tue, 17 Jul 2007 04:03:42 -0700
Subject: Report that kernel is tainted if there was an OOPS

If the kernel OOPSed or BUGed then it probably should be considered as
tainted.  Thus, all subsequent OOPSes and SysRq dumps will report the
tainted kernel.  This saves a lot of time explaining oddities in the
calltraces.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Added parisc patch from Matthew Wilson  -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7a4852505914..1eb9cde550c4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -210,6 +210,7 @@ extern enum system_states {
 #define TAINT_MACHINE_CHECK		(1<<4)
 #define TAINT_BAD_PAGE			(1<<5)
 #define TAINT_USER			(1<<6)
+#define TAINT_DIE			(1<<7)
 
 extern void dump_stack(void);
 
-- 
cgit v1.2.3


From 7664732315c97f48dba9d1e7339ad16fc5a320ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 17 Jul 2007 04:03:43 -0700
Subject: PTRACE_PEEKDATA consolidation

Identical implementations of PTRACE_PEEKDATA go into generic_ptrace_peekdata()
function.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eeb1976ef7bf..477cc8ed6bcb 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -110,6 +110,7 @@ static inline void ptrace_unlink(struct task_struct *child)
 		__ptrace_unlink(child);
 }
 
+int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data);
 
 #ifndef force_successful_syscall_return
 /*
-- 
cgit v1.2.3


From f284ce7269031947326bac6bb19a977705276222 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 17 Jul 2007 04:03:44 -0700
Subject: PTRACE_POKEDATA consolidation

Identical implementations of PTRACE_POKEDATA go into generic_ptrace_pokedata()
function.

AFAICS, fix bug on xtensa where successful PTRACE_POKEDATA will nevertheless
return EPERM.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 477cc8ed6bcb..ae8146abd746 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -111,6 +111,7 @@ static inline void ptrace_unlink(struct task_struct *child)
 }
 
 int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data);
+int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
 
 #ifndef force_successful_syscall_return
 /*
-- 
cgit v1.2.3


From 62239ac2b301abc397e70986649666cfb7835907 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 17 Jul 2007 04:03:45 -0700
Subject: proper prototype for proc_nr_files()

Add a proper prototype for proc_nr_files() in include/linux/fs.h

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e68780810279..b3a9f0db9d80 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -283,6 +283,7 @@ extern int dir_notify_enable;
 #include <linux/init.h>
 #include <linux/pid.h>
 #include <linux/mutex.h>
+#include <linux/sysctl.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -2050,5 +2051,9 @@ static inline void free_secdata(void *secdata)
 { }
 #endif	/* CONFIG_SECURITY */
 
+int proc_nr_files(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos);
+
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From f4480240f700587c15507b7815e75989b16825b2 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 17 Jul 2007 04:03:47 -0700
Subject: unregister_blkdev(): return void

Put WARN_ON and fixed all callers of unregister_blkdev().  Now we can make
unregister_blkdev return void.

Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3a9f0db9d80..aa74f7de9dcd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1527,7 +1527,7 @@ extern void putname(const char *name);
 
 #ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
-extern int unregister_blkdev(unsigned int, const char *);
+extern void unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
 extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
-- 
cgit v1.2.3


From 77293034696e3e0b6c8b8fc1f96be091104b3d2b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 17 Jul 2007 04:03:49 -0700
Subject: Remove OPEN_MAX

The OPEN_MAX macro in limits.h should not be there.  It claims to be the
limit on file descriptors in a process, but its value is wrong for that.
There is no constant value, but a variable resource limit (RLIMIT_NOFILE).
Nothing in the kernel uses OPEN_MAX except things that are wrong to do so.
I've submitted other patches to remove those uses.

The proper thing to do according to POSIX is not to define OPEN_MAX at all.
The sysconf (_SC_OPEN_MAX) implementation works by calling getrlimit.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/limits.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/limits.h b/include/linux/limits.h
index eaf2e099f125..c4b4e579c01d 100644
--- a/include/linux/limits.h
+++ b/include/linux/limits.h
@@ -6,7 +6,6 @@
 #define NGROUPS_MAX    65536	/* supplemental group IDs are available */
 #define ARG_MAX       131072	/* # bytes of args + environ for exec() */
 #define CHILD_MAX        999    /* no limit :-) */
-#define OPEN_MAX         256	/* # open files a process may have */
 #define LINK_MAX         127	/* # links a file may have */
 #define MAX_CANON        255	/* size of the canonical input queue */
 #define MAX_INPUT        255	/* size of the type-ahead buffer */
-- 
cgit v1.2.3


From f9e86f419073605b4520848021cc042963c227c7 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 17 Jul 2007 04:03:49 -0700
Subject: Remove CHILD_MAX

The CHILD_MAX macro in limits.h should not be there.  It claims to be the
limit on processes a user can own, but its value is wrong for that.
There is no constant value, but a variable resource limit (RLIMIT_NPROC).
Nothing in the kernel uses CHILD_MAX.

The proper thing to do according to POSIX is not to define CHILD_MAX at all.
The sysconf (_SC_CHILD_MAX) implementation works by calling getrlimit.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/limits.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/limits.h b/include/linux/limits.h
index c4b4e579c01d..2d0f94162fb3 100644
--- a/include/linux/limits.h
+++ b/include/linux/limits.h
@@ -5,7 +5,6 @@
 
 #define NGROUPS_MAX    65536	/* supplemental group IDs are available */
 #define ARG_MAX       131072	/* # bytes of args + environ for exec() */
-#define CHILD_MAX        999    /* no limit :-) */
 #define LINK_MAX         127	/* # links a file may have */
 #define MAX_CANON        255	/* size of the canonical input queue */
 #define MAX_INPUT        255	/* size of the type-ahead buffer */
-- 
cgit v1.2.3


From b45d52797432bd6b5d9786dbda940eb8d0b9ed06 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Tue, 17 Jul 2007 04:03:50 -0700
Subject: sb1250-duart.c: SB1250 DUART serial support

This is a driver for the SB1250 DUART, a dual serial port implementation
included in the Broadcom family of SOCs descending from the SiByte SB1250
MIPS64 chip multiprocessor.  It is a new implementation replacing the
old-fashioned driver currently present in the linux-mips.org tree.  It
supports all the usual features one would expect from a(n asynchronous)
serial driver, including modem line control (as far as hardware supports it
-- there is edge detection logic missing from the DCD and RI lines and the
driver does not implement polling of these lines at the moment), the serial
console, BREAK transmission and reception, including the magic SysRq.  The
receive FIFO threshold is not maintained though.

The driver was tested with a SWARM board which uses a BCM1250 SOC (which is
dual MIPS64 CMP) and has both ports of the single DUART implemented wired
externally.  Both were tested.  Testing included using the ports as
terminal lines at 1200bps (which is the ports minimum), 115200bps and a
couple of random speeds inbetween.  The modem lines were verified to
operate correctly.  No testing was performed with a use as a network
interface, like with SLIP or PPP.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 7f2c99d66e9d..9c721cd2c9d6 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -142,6 +142,9 @@
 /* Micrel KS8695 */
 #define PORT_KS8695	76
 
+/* Broadcom SB1250, etc. SOC */
+#define PORT_SB1250_DUART	77
+
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 9281acea6a3687ff0f262e0be31eac34895b95d7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 17 Jul 2007 04:03:51 -0700
Subject: kallsyms: make KSYM_NAME_LEN include space for trailing '\0'

KSYM_NAME_LEN is peculiar in that it does not include the space for the
trailing '\0', forcing all users to use KSYM_NAME_LEN + 1 when allocating
buffer.  This is nonsense and error-prone.  Moreover, when the caller
forgets that it's very likely to subtly bite back by corrupting the stack
because the last position of the buffer is always cleared to zero.

This patch increments KSYM_NAME_LEN by one and updates code accordingly.

* off-by-one bug in asm-powerpc/kprobes.h::kprobe_lookup_name() macro
  is fixed.

* Where MODULE_NAME_LEN and KSYM_NAME_LEN were used together,
  MODULE_NAME_LEN was treated as if it didn't include space for the
  trailing '\0'.  Fix it.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Paulo Marques <pmarques@grupopie.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 5f06527dca21..f73de6fb5c68 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -7,9 +7,9 @@
 
 #include <linux/errno.h>
 
-#define KSYM_NAME_LEN 127
-#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +	\
-			 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1)
+#define KSYM_NAME_LEN 128
+#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \
+			 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1)
 
 #ifdef CONFIG_KALLSYMS
 /* Lookup the address for a symbol. Returns 0 if not found. */
-- 
cgit v1.2.3


From 5b78cc9ac8602baafebb75a09025ffb17d1aebc2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Tue, 17 Jul 2007 04:03:53 -0700
Subject: make timespec_equal() take const arguments

Make arguments of timespec_equal() const struct timespec.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 4bb05a829be9..ec3b0ced0afe 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -36,7 +36,8 @@ struct timezone {
 #define NSEC_PER_SEC	1000000000L
 #define FSEC_PER_SEC	1000000000000000L
 
-static inline int timespec_equal(struct timespec *a, struct timespec *b)
+static inline int timespec_equal(const struct timespec *a,
+                                 const struct timespec *b)
 {
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 }
-- 
cgit v1.2.3


From dccd573bb02aa011a4a7146c02c409ac0bd722a0 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 17 Jul 2007 04:04:02 -0700
Subject: SPI controller drivers: check for unsupported modes

Minor SPI controller driver updates: make the setup() methods reject
spi->mode bits they don't support, by masking aginst the inverse of bits
they *do* support.  This insures against misbehavior later when new mode
bits get added.

Most controllers can't support SPI_LSB_FIRST; more handle SPI_CS_HIGH.
Support for all four SPI clock/transfer modes is routine.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi_bitbang.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 9dbca629dcfb..b8db32cea1de 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -26,6 +26,7 @@ struct spi_bitbang {
 	struct list_head	queue;
 	u8			busy;
 	u8			use_dma;
+	u8			flags;		/* extra spi->mode support */
 
 	struct spi_master	*master;
 
-- 
cgit v1.2.3


From c06e677aed0c86480b01faa894967daa8aa3568a Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 17 Jul 2007 04:04:03 -0700
Subject: SPI: add 3wire mode flag

Add a new spi->mode bit: SPI_3WIRE, for chips where the SI and SO signals
are shared (and which are thus only half duplex).  Update the LM70 driver
to require support for that hardware mode from the controller.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 1be5ea059477..302b81d1d117 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -76,6 +76,7 @@ struct spi_device {
 #define	SPI_MODE_3	(SPI_CPOL|SPI_CPHA)
 #define	SPI_CS_HIGH	0x04			/* chipselect active high? */
 #define	SPI_LSB_FIRST	0x08			/* per-word bits-on-wire */
+#define	SPI_3WIRE	0x10			/* SI/SO signals shared */
 	u8			bits_per_word;
 	int			irq;
 	void			*controller_state;
-- 
cgit v1.2.3


From ad241528c4919505afccb022acbab3eeb0db4d80 Mon Sep 17 00:00:00 2001
From: Jan Nikitenko <jan.nikitenko@gmail.com>
Date: Tue, 17 Jul 2007 04:04:03 -0700
Subject: CRC7 support

Add CRC7 routines, used for example in MMC over SPI communication.
Kerneldoc updates

[akpm@linux-foundation.org: fix funny mix of const and non-const]
Signed-off-by: Jan Nikitenko <jan.nikitenko@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crc7.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/linux/crc7.h

(limited to 'include/linux')

diff --git a/include/linux/crc7.h b/include/linux/crc7.h
new file mode 100644
index 000000000000..1786e772d5c6
--- /dev/null
+++ b/include/linux/crc7.h
@@ -0,0 +1,14 @@
+#ifndef _LINUX_CRC7_H
+#define _LINUX_CRC7_H
+#include <linux/types.h>
+
+extern const u8 crc7_syndrome_table[256];
+
+static inline u8 crc7_byte(u8 crc, u8 data)
+{
+	return crc7_syndrome_table[(crc << 1) ^ data];
+}
+
+extern u8 crc7(u8 crc, const u8 *buffer, size_t len);
+
+#endif
-- 
cgit v1.2.3


From 447aef1a19135a69bfd725c33f7e753740cb8447 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@trinity.fluff.org>
Date: Tue, 17 Jul 2007 04:04:10 -0700
Subject: SPI: tle620x power switch driver

Add support for the Infineon TLE62x0 series of low-side driver chips, such
as the TLE6220 or TLE6230.  These can be viewed as output GPIOs specialized
for power switching applications.  The driver provides a userspace
interface to those GPIOs, and to the switch status they provide.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/tle62x0.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 include/linux/spi/tle62x0.h

(limited to 'include/linux')

diff --git a/include/linux/spi/tle62x0.h b/include/linux/spi/tle62x0.h
new file mode 100644
index 000000000000..60b59187e590
--- /dev/null
+++ b/include/linux/spi/tle62x0.h
@@ -0,0 +1,24 @@
+/*
+ * tle62x0.h - platform glue to Infineon TLE62x0 driver chips
+ *
+ * Copyright 2007 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+struct tle62x0_pdata {
+	unsigned int		init_state;
+	unsigned int		gpio_count;
+};
-- 
cgit v1.2.3


From f29ba280ecb46331c1f6842b094808af01131422 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Tue, 17 Jul 2007 04:04:12 -0700
Subject: spi_mpc83xx.c: support QE enabled 83xx CPU's like mpc832x

Quicc Engine enabled mpc83xx CPU's has a somewhat different HW interface to
the SPI controller.  This patch adds a qe_mode knob that sees to that
needed adaptions are performed.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fsl_devices.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 12e631f0fb77..695741b0e420 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -112,7 +112,7 @@ struct fsl_usb2_platform_data {
 struct fsl_spi_platform_data {
 	u32 	initial_spmode;	/* initial SPMODE value */
 	u16	bus_num;
-
+	bool	qe_mode;
 	/* board specific information */
 	u16	max_chipselect;
 	void	(*activate_cs)(u8 cs, u8 polarity);
-- 
cgit v1.2.3


From 67837f232d6d55be99d6e0dec4ea9bb8112840cd Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Tue, 17 Jul 2007 04:04:16 -0700
Subject: Use mutex instead of semaphore in CAPI 2.0 driver

The CAPI 2.0 driver uses a semaphore as mutex.  Use the mutex API instead of
the (binary) semaphore.

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Acked-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernelcapi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h
index aea34e74c496..8c4350a9ed87 100644
--- a/include/linux/kernelcapi.h
+++ b/include/linux/kernelcapi.h
@@ -64,7 +64,7 @@ struct capi20_appl {
 	unsigned long nrecvdatapkt;
 	unsigned long nsentctlpkt;
 	unsigned long nsentdatapkt;
-	struct semaphore recv_sem;
+	struct mutex recv_mtx;
 	struct sk_buff_head recv_queue;
 	struct work_struct recv_work;
 	int release_in_progress;
-- 
cgit v1.2.3


From a569425512253992cc64ebf8b6d00a62f986db3e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 17 Jul 2007 04:04:28 -0700
Subject: knfsd: exportfs: add exportfs.h header

currently the export_operation structure and helpers related to it are in
fs.h.  fs.h is already far too large and there are very few places needing the
export bits, so split them off into a separate header.

[akpm@linux-foundation.org: fix cifs build]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: Steven French <sfrench@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/exportfs.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h       | 114 +--------------------------------------------
 2 files changed, 120 insertions(+), 113 deletions(-)
 create mode 100644 include/linux/exportfs.h

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
new file mode 100644
index 000000000000..fdc306fbba50
--- /dev/null
+++ b/include/linux/exportfs.h
@@ -0,0 +1,119 @@
+#ifndef LINUX_EXPORTFS_H
+#define LINUX_EXPORTFS_H 1
+
+#include <linux/types.h>
+
+struct dentry;
+struct super_block;
+
+
+/**
+ * struct export_operations - for nfsd to communicate with file systems
+ * @decode_fh:      decode a file handle fragment and return a &struct dentry
+ * @encode_fh:      encode a file handle fragment from a dentry
+ * @get_name:       find the name for a given inode in a given directory
+ * @get_parent:     find the parent of a given directory
+ * @get_dentry:     find a dentry for the inode given a file handle sub-fragment
+ * @find_exported_dentry:
+ *	set by the exporting module to a standard helper function.
+ *
+ * Description:
+ *    The export_operations structure provides a means for nfsd to communicate
+ *    with a particular exported file system  - particularly enabling nfsd and
+ *    the filesystem to co-operate when dealing with file handles.
+ *
+ *    export_operations contains two basic operation for dealing with file
+ *    handles, decode_fh() and encode_fh(), and allows for some other
+ *    operations to be defined which standard helper routines use to get
+ *    specific information from the filesystem.
+ *
+ *    nfsd encodes information use to determine which filesystem a filehandle
+ *    applies to in the initial part of the file handle.  The remainder, termed
+ *    a file handle fragment, is controlled completely by the filesystem.  The
+ *    standard helper routines assume that this fragment will contain one or
+ *    two sub-fragments, one which identifies the file, and one which may be
+ *    used to identify the (a) directory containing the file.
+ *
+ *    In some situations, nfsd needs to get a dentry which is connected into a
+ *    specific part of the file tree.  To allow for this, it passes the
+ *    function acceptable() together with a @context which can be used to see
+ *    if the dentry is acceptable.  As there can be multiple dentrys for a
+ *    given file, the filesystem should check each one for acceptability before
+ *    looking for the next.  As soon as an acceptable one is found, it should
+ *    be returned.
+ *
+ * decode_fh:
+ *    @decode_fh is given a &struct super_block (@sb), a file handle fragment
+ *    (@fh, @fh_len) and an acceptability testing function (@acceptable,
+ *    @context).  It should return a &struct dentry which refers to the same
+ *    file that the file handle fragment refers to,  and which passes the
+ *    acceptability test.  If it cannot, it should return a %NULL pointer if
+ *    the file was found but no acceptable &dentries were available, or a
+ *    %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or
+ *    %ENOMEM).
+ *
+ * encode_fh:
+ *    @encode_fh should store in the file handle fragment @fh (using at most
+ *    @max_len bytes) information that can be used by @decode_fh to recover the
+ *    file refered to by the &struct dentry @de.  If the @connectable flag is
+ *    set, the encode_fh() should store sufficient information so that a good
+ *    attempt can be made to find not only the file but also it's place in the
+ *    filesystem.   This typically means storing a reference to de->d_parent in
+ *    the filehandle fragment.  encode_fh() should return the number of bytes
+ *    stored or a negative error code such as %-ENOSPC
+ *
+ * get_name:
+ *    @get_name should find a name for the given @child in the given @parent
+ *    directory.  The name should be stored in the @name (with the
+ *    understanding that it is already pointing to a a %NAME_MAX+1 sized
+ *    buffer.   get_name() should return %0 on success, a negative error code
+ *    or error.  @get_name will be called without @parent->i_mutex held.
+ *
+ * get_parent:
+ *    @get_parent should find the parent directory for the given @child which
+ *    is also a directory.  In the event that it cannot be found, or storage
+ *    space cannot be allocated, a %ERR_PTR should be returned.
+ *
+ * get_dentry:
+ *    Given a &super_block (@sb) and a pointer to a file-system specific inode
+ *    identifier, possibly an inode number, (@inump) get_dentry() should find
+ *    the identified inode and return a dentry for that inode.  Any suitable
+ *    dentry can be returned including, if necessary, a new dentry created with
+ *    d_alloc_root.  The caller can then find any other extant dentrys by
+ *    following the d_alias links.  If a new dentry was created using
+ *    d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry
+ *    should be d_rehash()ed.
+ *
+ *    If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code
+ *    can be returned.  The @inump will be whatever was passed to
+ *    nfsd_find_fh_dentry() in either the @obj or @parent parameters.
+ *
+ * Locking rules:
+ *    get_parent is called with child->d_inode->i_mutex down
+ *    get_name is not (which is possibly inconsistent)
+ */
+
+struct export_operations {
+	struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh,
+			int fh_len, int fh_type,
+			int (*acceptable)(void *context, struct dentry *de),
+			void *context);
+	int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
+			int connectable);
+	int (*get_name)(struct dentry *parent, char *name,
+			struct dentry *child);
+	struct dentry * (*get_parent)(struct dentry *child);
+	struct dentry * (*get_dentry)(struct super_block *sb, void *inump);
+
+	/* This is set by the exporting module to a standard helper */
+	struct dentry * (*find_exported_dentry)(
+			struct super_block *sb, void *obj, void *parent,
+			int (*acceptable)(void *context, struct dentry *de),
+			void *context);
+};
+
+extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj,
+	void *parent, int (*acceptable)(void *context, struct dentry *de),
+	void *context);
+
+#endif /* LINUX_EXPORTFS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa74f7de9dcd..58ce336d4a6b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -289,6 +289,7 @@ extern int dir_notify_enable;
 #include <asm/semaphore.h>
 #include <asm/byteorder.h>
 
+struct export_operations;
 struct hd_geometry;
 struct iovec;
 struct nameidata;
@@ -1278,119 +1279,6 @@ static inline void file_accessed(struct file *file)
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
 
-/**
- * struct export_operations - for nfsd to communicate with file systems
- * @decode_fh:      decode a file handle fragment and return a &struct dentry
- * @encode_fh:      encode a file handle fragment from a dentry
- * @get_name:       find the name for a given inode in a given directory
- * @get_parent:     find the parent of a given directory
- * @get_dentry:     find a dentry for the inode given a file handle sub-fragment
- * @find_exported_dentry:
- *	set by the exporting module to a standard helper function.
- *
- * Description:
- *    The export_operations structure provides a means for nfsd to communicate
- *    with a particular exported file system  - particularly enabling nfsd and
- *    the filesystem to co-operate when dealing with file handles.
- *
- *    export_operations contains two basic operation for dealing with file
- *    handles, decode_fh() and encode_fh(), and allows for some other
- *    operations to be defined which standard helper routines use to get
- *    specific information from the filesystem.
- *
- *    nfsd encodes information use to determine which filesystem a filehandle
- *    applies to in the initial part of the file handle.  The remainder, termed
- *    a file handle fragment, is controlled completely by the filesystem.  The
- *    standard helper routines assume that this fragment will contain one or
- *    two sub-fragments, one which identifies the file, and one which may be
- *    used to identify the (a) directory containing the file.
- *
- *    In some situations, nfsd needs to get a dentry which is connected into a
- *    specific part of the file tree.  To allow for this, it passes the
- *    function acceptable() together with a @context which can be used to see
- *    if the dentry is acceptable.  As there can be multiple dentrys for a
- *    given file, the filesystem should check each one for acceptability before
- *    looking for the next.  As soon as an acceptable one is found, it should
- *    be returned.
- *
- * decode_fh:
- *    @decode_fh is given a &struct super_block (@sb), a file handle fragment
- *    (@fh, @fh_len) and an acceptability testing function (@acceptable,
- *    @context).  It should return a &struct dentry which refers to the same
- *    file that the file handle fragment refers to,  and which passes the
- *    acceptability test.  If it cannot, it should return a %NULL pointer if
- *    the file was found but no acceptable &dentries were available, or a
- *    %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or
- *    %ENOMEM).
- *
- * encode_fh:
- *    @encode_fh should store in the file handle fragment @fh (using at most
- *    @max_len bytes) information that can be used by @decode_fh to recover the
- *    file refered to by the &struct dentry @de.  If the @connectable flag is
- *    set, the encode_fh() should store sufficient information so that a good
- *    attempt can be made to find not only the file but also it's place in the
- *    filesystem.   This typically means storing a reference to de->d_parent in
- *    the filehandle fragment.  encode_fh() should return the number of bytes
- *    stored or a negative error code such as %-ENOSPC
- *
- * get_name:
- *    @get_name should find a name for the given @child in the given @parent
- *    directory.  The name should be stored in the @name (with the
- *    understanding that it is already pointing to a a %NAME_MAX+1 sized
- *    buffer.   get_name() should return %0 on success, a negative error code
- *    or error.  @get_name will be called without @parent->i_mutex held.
- *
- * get_parent:
- *    @get_parent should find the parent directory for the given @child which
- *    is also a directory.  In the event that it cannot be found, or storage
- *    space cannot be allocated, a %ERR_PTR should be returned.
- *
- * get_dentry:
- *    Given a &super_block (@sb) and a pointer to a file-system specific inode
- *    identifier, possibly an inode number, (@inump) get_dentry() should find
- *    the identified inode and return a dentry for that inode.  Any suitable
- *    dentry can be returned including, if necessary, a new dentry created with
- *    d_alloc_root.  The caller can then find any other extant dentrys by
- *    following the d_alias links.  If a new dentry was created using
- *    d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry
- *    should be d_rehash()ed.
- *
- *    If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code
- *    can be returned.  The @inump will be whatever was passed to
- *    nfsd_find_fh_dentry() in either the @obj or @parent parameters.
- *
- * Locking rules:
- *    get_parent is called with child->d_inode->i_mutex down
- *    get_name is not (which is possibly inconsistent)
- */
-
-struct export_operations {
-	struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, int fh_len, int fh_type,
-			 int (*acceptable)(void *context, struct dentry *de),
-			 void *context);
-	int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
-			 int connectable);
-
-	/* the following are only called from the filesystem itself */
-	int (*get_name)(struct dentry *parent, char *name,
-			struct dentry *child);
-	struct dentry * (*get_parent)(struct dentry *child);
-	struct dentry * (*get_dentry)(struct super_block *sb, void *inump);
-
-	/* This is set by the exporting module to a standard helper */
-	struct dentry * (*find_exported_dentry)(
-		struct super_block *sb, void *obj, void *parent,
-		int (*acceptable)(void *context, struct dentry *de),
-		void *context);
-
-
-};
-
-extern struct dentry *
-find_exported_dentry(struct super_block *sb, void *obj, void *parent,
-		     int (*acceptable)(void *context, struct dentry *de),
-		     void *context);
-
 struct file_system_type {
 	const char *name;
 	int fs_flags;
-- 
cgit v1.2.3


From 5ca29607331fe37980dc3b488793ef8b1409b722 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 17 Jul 2007 04:04:29 -0700
Subject: knfsd: exportfs: remove iget abuse

When the exportfs interface was added the expectation was that filesystems
provide an operation to convert from a file handle to an inode/dentry, but it
kept a backwards compat option that still calls into iget.

Calling into iget from non-filesystem code is very bad, because it gives too
little information to filesystem, and simply crashes if the filesystem doesn't
implement the ->read_inode routine.

Fortunately there are only two filesystems left using this fallback: efs and
jfs.  This patch moves a copy of export_iget to each of those to implement the
get_dentry method.

While this is a temporary increase of lines of code in the kernel it allows
for a much cleaner interface and important code restructuring in later
patches.

[akpm@linux-foundation.org: add jfs_get_inode_flags() declaration]
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/efs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h
index dfed8009ebff..16cb25cbf7c5 100644
--- a/include/linux/efs_fs.h
+++ b/include/linux/efs_fs.h
@@ -45,6 +45,7 @@ extern efs_block_t efs_map_block(struct inode *, efs_block_t);
 extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp);
 extern struct dentry *efs_get_parent(struct dentry *);
 extern int efs_bmap(struct inode *, int);
 
-- 
cgit v1.2.3


From d37065cd6d6bbe98fd4be14d6c9e64c0bfa124c5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 17 Jul 2007 04:04:30 -0700
Subject: knfsd: exportfs: add procedural interface for NFSD

Currently NFSD calls directly into filesystems through the export_operations
structure.  I plan to change this interface in various ways in later patches,
and want to avoid the export of the default operations to NFSD, so this patch
adds two simple exportfs_encode_fh/exportfs_decode_fh helpers for NFSD to call
instead of poking into exportfs guts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/exportfs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fdc306fbba50..8872fe8392d6 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -5,6 +5,7 @@
 
 struct dentry;
 struct super_block;
+struct vfsmount;
 
 
 /**
@@ -116,4 +117,10 @@ extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj,
 	void *parent, int (*acceptable)(void *context, struct dentry *de),
 	void *context);
 
+extern int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
+	int connectable);
+extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh,
+	int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *),
+	void *context);
+
 #endif /* LINUX_EXPORTFS_H */
-- 
cgit v1.2.3


From 9a8db97e7756119689c93c431e8b8324080f5625 Mon Sep 17 00:00:00 2001
From: Marc Eshel <eshel@almaden.ibm.com>
Date: Tue, 17 Jul 2007 04:04:35 -0700
Subject: knfsd: lockd: nfsd4: use same grace period for lockd and nfsd4

Both lockd and (in the nfsv4 case) nfsd enforce a "grace period" after reboot,
during which clients may reclaim locks from the previous server instance, but
may not acquire new locks.

Currently the lockd and nfsd enforce grace periods of different lengths.  This
may cause problems when we reboot a server with both v2/v3 and v4 clients.
For example, if the lockd grace period is shorter (as is likely the case),
then a v3 client might acquire a new lock that conflicts with a lock already
held (but not yet reclaimed) by a v4 client.

This patch calculates a lease time that lockd and nfsd can both use.

Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockd/bind.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 246de1d84a26..6f1637c61e10 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -27,6 +27,7 @@ struct nlmsvc_binding {
 						struct nfs_fh *,
 						struct file **);
 	void			(*fclose)(struct file *);
+	unsigned long		(*get_grace_period)(void);
 };
 
 extern struct nlmsvc_binding *	nlmsvc_ops;
@@ -38,4 +39,12 @@ extern int	nlmclnt_proc(struct inode *, int, struct file_lock *);
 extern int	lockd_up(int proto);
 extern void	lockd_down(void);
 
+unsigned long get_nfs_grace_period(void);
+
+#ifdef CONFIG_NFSD_V4
+unsigned long get_nfs4_grace_period(void);
+#else
+static inline unsigned long get_nfs4_grace_period(void) {return 0;}
+#endif
+
 #endif /* LINUX_LOCKD_BIND_H */
-- 
cgit v1.2.3


From 33a1060ae7dc671a0208b341bd454009625bb5a6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 17 Jul 2007 04:04:35 -0700
Subject: knfsd: nfsd4: fix NFSv4 filehandle size units confusion

NFS4_FHSIZE is measured in bytes, not 4-byte words, so much more space than
necessary is being allocated for struct nfs4_cb_recall.

I should have wondered why this structure was so much larger than it needed to
be!

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ab5c236bd9a7..732de9cad4a8 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -67,7 +67,7 @@ struct nfs4_cb_recall {
 	int			cbr_trunc;
 	stateid_t		cbr_stateid;
 	u32			cbr_fhlen;
-	u32			cbr_fhval[NFS4_FHSIZE];
+	char			cbr_fhval[NFS4_FHSIZE];
 	struct nfs4_delegation	*cbr_dp;
 };
 
-- 
cgit v1.2.3


From 1e5140279f31e47d58ed6036ee61ba7a65710e63 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 17 Jul 2007 04:04:38 -0700
Subject: knfsd: nfsd: remove unused header interface.h

It looks like Al Viro gutted this header file five years ago and it hasn't
been touched since.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/interface.h | 13 -------------
 include/linux/nfsd/nfsd.h      |  1 -
 2 files changed, 14 deletions(-)
 delete mode 100644 include/linux/nfsd/interface.h

(limited to 'include/linux')

diff --git a/include/linux/nfsd/interface.h b/include/linux/nfsd/interface.h
deleted file mode 100644
index af0979704afb..000000000000
--- a/include/linux/nfsd/interface.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * include/linux/nfsd/interface.h
- *
- * defines interface between nfsd and other bits of
- * the kernel.  Particularly filesystems (eventually).
- *
- * Copyright (C) 2000 Neil Brown <neilb@cse.unsw.edu.au>
- */
-
-#ifndef LINUX_NFSD_INTERFACE_H
-#define LINUX_NFSD_INTERFACE_H
-
-#endif /* LINUX_NFSD_INTERFACE_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 72feac581aa3..0d8420497765 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -22,7 +22,6 @@
 #include <linux/nfsd/export.h>
 #include <linux/nfsd/auth.h>
 #include <linux/nfsd/stats.h>
-#include <linux/nfsd/interface.h>
 /*
  * nfsd version
  */
-- 
cgit v1.2.3


From c2f1a551dea8b37c2e0cb886885c250fb703e9d8 Mon Sep 17 00:00:00 2001
From: Meelap Shah <meelap@umich.edu>
Date: Tue, 17 Jul 2007 04:04:39 -0700
Subject: knfsd: nfsd4: vary maximum delegation limit based on RAM size

Our original NFSv4 delegation policy was to give out a read delegation on any
open when it was possible to.

Since the lifetime of a delegation isn't limited to that of an open, a client
may quite reasonably hang on to a delegation as long as it has the inode
cached.  This becomes an obvious problem the first time a client's inode cache
approaches the size of the server's total memory.

Our first quick solution was to add a hard-coded limit.  This patch makes a
mild incremental improvement by varying that limit according to the server's
total memory size, allowing at most 4 delegations per megabyte of RAM.

My quick back-of-the-envelope calculation finds that in the worst case (where
every delegation is for a different inode), a delegation could take about
1.5K, which would make the worst case usage about 6% of memory.  The new limit
works out to be about the same as the old on a 1-gig server.

[akpm@linux-foundation.org: Don't needlessly bloat vmlinux]
[akpm@linux-foundation.org: Make it right for highmem machines]
Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/nfsd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 0d8420497765..ce5e345a9bce 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -148,6 +148,7 @@ extern int nfsd_max_blksize;
  * NFSv4 State
  */
 #ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
 void nfs4_state_init(void);
 int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
-- 
cgit v1.2.3


From 47f9940c55c0bdc65188749cae4e841601f513bb Mon Sep 17 00:00:00 2001
From: Meelap Shah <meelap@umich.edu>
Date: Tue, 17 Jul 2007 04:04:40 -0700
Subject: knfsd: nfsd4: don't delegate files that have had conflicts

One more incremental delegation policy improvement: don't give out a
delegation on a file if conflicting access has previously required that a
delegation be revoked on that file.  (In practice we'll forget about the
conflict when the struct nfs4_file is removed on close, so this is of limited
use for now, though it should at least solve a temporary problem with
self-conflicts on write opens from the same client.)

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/state.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 732de9cad4a8..db348f749376 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -224,6 +224,7 @@ struct nfs4_file {
 	struct inode		*fi_inode;
 	u32                     fi_id;      /* used with stateowner->so_id 
 					     * for stateid_hashtbl hash */
+	bool			fi_had_conflict;
 };
 
 /*
-- 
cgit v1.2.3


From c4170583f655dca5da32bd14173d6a93805fc48b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:42 -0700
Subject: knfsd: nfsd4: store pseudoflavor in request

Add a new field to the svc_rqst structure to record the pseudoflavor that the
request was made with.  For now we record the pseudoflavor but don't use it
for anything.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sunrpc/gss_api.h | 1 +
 include/linux/sunrpc/svc.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index bbac101ac372..459c5fc11d51 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -58,6 +58,7 @@ u32 gss_unwrap(
 u32 gss_delete_sec_context(
 		struct gss_ctx		**ctx_id);
 
+u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service);
 u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor);
 char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service);
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 129d50f2225c..705a90aa345e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -212,6 +212,7 @@ struct svc_rqst {
 	struct svc_pool *	rq_pool;	/* thread pool */
 	struct svc_procedure *	rq_procinfo;	/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
+	u32			rq_flavor;	/* pseudoflavor */
 	struct svc_cred		rq_cred;	/* auth info */
 	struct sk_buff *	rq_skbuff;	/* fast recv inet buffer */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
-- 
cgit v1.2.3


From e677bfe4d451f8271986a229270c6eecd1f62b3f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:42 -0700
Subject: knfsd: nfsd4: parse secinfo information in exports downcall

We add a list of pseudoflavors to each export downcall, which will be used
both as a list of security flavors allowed on that export, and (in the order
given) as the list of pseudoflavors to return on secinfo calls.

This patch parses the new downcall information and adds it to the export
structure, but doesn't use it for anything yet.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 9f62d6182d32..736f0eafcedf 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -42,6 +42,8 @@
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
 #define NFSEXP_ALLFLAGS		0xFE3F
 
+/* The flags that may vary depending on security flavor: */
+#define NFSEXP_SECINFO_FLAGS	0
 
 #ifdef __KERNEL__
 
@@ -64,6 +66,19 @@ struct nfsd4_fs_locations {
 	int migrated;
 };
 
+/*
+ * We keep an array of pseudoflavors with the export, in order from most
+ * to least preferred.  For the forseeable future, we don't expect more
+ * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
+ * spkm3i, and spkm3p (and using all 8 at once should be rare).
+ */
+#define MAX_SECINFO_LIST	8
+
+struct exp_flavor_info {
+	u32	pseudoflavor;
+	u32	flags;
+};
+
 struct svc_export {
 	struct cache_head	h;
 	struct auth_domain *	ex_client;
@@ -76,6 +91,8 @@ struct svc_export {
 	int			ex_fsid;
 	unsigned char *		ex_uuid; /* 16 byte fsid */
 	struct nfsd4_fs_locations ex_fslocs;
+	int			ex_nflavors;
+	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
 };
 
 /* an "export key" (expkey) maps a filehandlefragement to an
-- 
cgit v1.2.3


From df547efb03e3e8f9ea726e1d07fbbd6fd0706cd7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:43 -0700
Subject: knfsd: nfsd4: simplify exp_pseudoroot arguments

We're passing three arguments to exp_pseudoroot, two of which are just fields
of the svc_rqst.  Soon we'll want to pass in a third field as well.  So let's
just give up and pass in the whole struct svc_rqst.

Also sneak in some minor style cleanups while we're at it.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 736f0eafcedf..5ed4f277eeac 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -135,7 +135,7 @@ struct svc_export *	exp_parent(struct auth_domain *clp,
 				   struct cache_req *reqp);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
-__be32			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
+__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 __be32			nfserrno(int errno);
 
 extern struct cache_detail svc_export_cache;
-- 
cgit v1.2.3


From 0989a7889695831e49e2c53c1884f52645516a90 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:44 -0700
Subject: knfsd: nfsd: provide export lookup wrappers which take a svc_rqst

Split the callers of exp_get_by_name(), exp_find(), and exp_parent() into
those that are processing requests and those that are doing other stuff (like
looking up filehandles for mountd).

No change in behavior, just a (fairly pointless, on its own) cleanup.

(Note this has the effect of making nfsd_cross_mnt() pass rqstp->rq_client
instead of exp->ex_client into exp_find_by_name().  However, the two should
have the same value at this point.)

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 5ed4f277eeac..1ba53e524749 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -129,10 +129,16 @@ struct svc_export *	exp_get_by_name(struct auth_domain *clp,
 					struct vfsmount *mnt,
 					struct dentry *dentry,
 					struct cache_req *reqp);
+struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
+					     struct vfsmount *,
+					     struct dentry *);
 struct svc_export *	exp_parent(struct auth_domain *clp,
 				   struct vfsmount *mnt,
 				   struct dentry *dentry,
 				   struct cache_req *reqp);
+struct svc_export *	rqst_exp_parent(struct svc_rqst *,
+					struct vfsmount *mnt,
+					struct dentry *dentry);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
@@ -152,6 +158,7 @@ static inline void exp_get(struct svc_export *exp)
 extern struct svc_export *
 exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
 	 struct cache_req *reqp);
+struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 3ab4d8b1215d61736e2a9a26bea7cc2e6b029e3d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:46 -0700
Subject: knfsd: nfsd: set rq_client to ip-address-determined-domain

We want it to be possible for users to restrict exports both by IP address and
by pseudoflavor.  The pseudoflavor information has previously been passed
using special auth_domains stored in the rq_client field.  After the preceding
patch that stored the pseudoflavor in rq_pflavor, that's now superfluous; so
now we use rq_client for the ip information, as auth_null and auth_unix do.

However, we keep around the special auth_domain in the rq_gssclient field for
backwards compatibility purposes, so we can still do upcalls using the old
"gss/pseudoflavor" auth_domain if upcalls using the unix domain to give us an
appropriate export.  This allows us to continue supporting old mountd.

In fact, for this first patch, we always use the "gss/pseudoflavor"
auth_domain (and only it) if it is available; thus rq_client is ignored in the
auth_gss case, and this patch on its own makes no change in behavior; that
will be left to later patches.

Note on idmap: I'm almost tempted to just replace the auth_domain in the idmap
upcall by a dummy value--no version of idmapd has ever used it, and it's
unlikely anyone really wants to perform idmapping differently depending on the
where the client is (they may want to perform *credential* mapping
differently, but that's a different matter--the idmapper just handles id's
used in getattr and setattr).  But I'm updating the idmapd code anyway, just
out of general backwards-compatibility paranoia.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sunrpc/svc.h     | 1 +
 include/linux/sunrpc/svcauth.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 705a90aa345e..8531a70da73d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -249,6 +249,7 @@ struct svc_rqst {
 						 */
 	/* Catering to nfsd */
 	struct auth_domain *	rq_client;	/* RPC peer info */
+	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
 	struct knfsd_fh *	rq_reffh;	/* Referrence filehandle, used to
 						 * determine what device number
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index de92619b0826..22e1ef8e200e 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -127,6 +127,7 @@ extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
 extern void svcauth_unix_purge(void);
 extern void svcauth_unix_info_release(void *);
+extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
 
 static inline unsigned long hash_str(char *name, int bits)
 {
-- 
cgit v1.2.3


From 32c1eb0cd7ee00b5eb7b6f7059c635fbc1052966 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:48 -0700
Subject: knfsd: nfsd4: return nfserr_wrongsec

Make the first actual use of the secinfo information by using it to return
nfserr_wrongsec when an export is found that doesn't allow the flavor used on
this request.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 1 +
 include/linux/nfsd/nfsd.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 1ba53e524749..424be41130ba 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -116,6 +116,7 @@ struct svc_expkey {
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
 
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 
 /*
  * Function declarations
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index ce5e345a9bce..62499c2f0918 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -236,6 +236,7 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_badname		__constant_htonl(NFSERR_BADNAME)
 #define	nfserr_cb_path_down	__constant_htonl(NFSERR_CB_PATH_DOWN)
 #define	nfserr_locked		__constant_htonl(NFSERR_LOCKED)
+#define	nfserr_wrongsec		__constant_htonl(NFSERR_WRONGSEC)
 #define	nfserr_replay_me	__constant_htonl(NFSERR_REPLAY_ME)
 
 /* error codes for internal use */
-- 
cgit v1.2.3


From 0ec757df9743025f14190d6034d8bd2bf37c2dd1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:48 -0700
Subject: knfsd: nfsd4: make readonly access depend on pseudoflavor

Allow readonly access to vary depending on the pseudoflavor, using the flag
passed with each pseudoflavor in the export downcall.  The rest of the flags
are ignored for now, though some day we might also allow id squashing to vary
based on the flavor.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 13 ++++++++++++-
 include/linux/nfsd/nfsd.h   |  3 ++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 424be41130ba..a01f775cb944 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -112,10 +112,21 @@ struct svc_expkey {
 
 #define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
 #define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
-#define EX_RDONLY(exp)		((exp)->ex_flags & NFSEXP_READONLY)
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
 
+static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp)
+{
+	struct exp_flavor_info *f;
+	struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+
+	for (f = exp->ex_flavors; f < end; f++) {
+		if (f->pseudoflavor == rqstp->rq_flavor)
+			return f->flags & NFSEXP_READONLY;
+	}
+	return exp->ex_flags & NFSEXP_READONLY;
+}
+
 __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 
 /*
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 62499c2f0918..54ef1a18a56c 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -119,7 +119,8 @@ __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *);
 
 int		nfsd_notify_change(struct inode *, struct iattr *);
-__be32		nfsd_permission(struct svc_export *, struct dentry *, int);
+__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
+				struct dentry *, int);
 int		nfsd_sync_dir(struct dentry *dp);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-- 
cgit v1.2.3


From dcb488a3b7ac3987e21148f44f641c9b2e734232 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:51 -0700
Subject: knfsd: nfsd4: implement secinfo

Implement the secinfo operation.

(Thanks to Usha Ketineni wrote an earlier version of this support.)

Cc: Usha Ketineni <uketinen@us.ibm.com>
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/nfsd.h | 3 +++
 include/linux/nfsd/xdr4.h | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 54ef1a18a56c..e452256d3f72 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -71,6 +71,9 @@ int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 		                struct svc_export **expp);
 __be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
 				const char *, int, struct svc_fh *);
+__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+				const char *, int,
+				struct svc_export **, struct dentry **);
 __be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
 				struct iattr *, int, time_t);
 #ifdef CONFIG_NFSD_V4
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 09799bcee0ac..1b653267133a 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -293,6 +293,12 @@ struct nfsd4_rename {
 	struct nfsd4_change_info  rn_tinfo; /* response */
 };
 
+struct nfsd4_secinfo {
+	u32 si_namelen;					/* request */
+	char *si_name;					/* request */
+	struct svc_export *si_exp;			/* response */
+};
+
 struct nfsd4_setattr {
 	stateid_t	sa_stateid;         /* request */
 	u32		sa_bmval[2];        /* request */
@@ -365,6 +371,7 @@ struct nfsd4_op {
 		struct nfsd4_remove		remove;
 		struct nfsd4_rename		rename;
 		clientid_t			renew;
+		struct nfsd4_secinfo		secinfo;
 		struct nfsd4_setattr		setattr;
 		struct nfsd4_setclientid	setclientid;
 		struct nfsd4_setclientid_confirm setclientid_confirm;
-- 
cgit v1.2.3


From 4796f45740bc6f2e3e6cc14e7ed481b38bd0bd39 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:51 -0700
Subject: knfsd: nfsd4: secinfo handling without secinfo= option

We could return some sort of error in the case where someone asks for secinfo
on an export without the secinfo= option set--that'd be no worse than what
we've been doing.  But it's not really correct.  So, hack up an approximate
secinfo response in that case--it may not be complete, but it'll tell the
client at least one acceptable security flavor.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sunrpc/svcauth_gss.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index 5a5db16ab660..417a1def56db 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -22,6 +22,7 @@
 int gss_svc_init(void);
 void gss_svc_shutdown(void);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
+u32 svcauth_gss_flavor(struct auth_domain *dom);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
-- 
cgit v1.2.3


From 1269bc69b6649282091bb7007372acf4ab8357fd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 17 Jul 2007 04:04:52 -0700
Subject: knfsd: nfsd: enforce per-flavor id squashing

Allow root squashing to vary per-pseudoflavor, so that you can (for example)
allow root access only when sufficiently strong security is in use.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a01f775cb944..78feb7beff75 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -43,7 +43,8 @@
 #define NFSEXP_ALLFLAGS		0xFE3F
 
 /* The flags that may vary depending on security flavor: */
-#define NFSEXP_SECINFO_FLAGS	0
+#define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
+					| NFSEXP_ALLSQUASH)
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 2e774c7caf84455d5e7d492d123bad6f417818b5 Mon Sep 17 00:00:00 2001
From: Mark Zhan <rongkai.zhan@windriver.com>
Date: Tue, 17 Jul 2007 04:05:05 -0700
Subject: rtc: add support for the ST M48T59 RTC

[akpm@linux-foundation.org: x86_64 build fix]
[akpm@linux-foundation.org: The acpi guys changed the bin_attribute code]
Signed-off-by: Mark Zhan <rongkai.zhan@windriver.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rtc/m48t59.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 include/linux/rtc/m48t59.h

(limited to 'include/linux')

diff --git a/include/linux/rtc/m48t59.h b/include/linux/rtc/m48t59.h
new file mode 100644
index 000000000000..e8c7c21ceb1f
--- /dev/null
+++ b/include/linux/rtc/m48t59.h
@@ -0,0 +1,57 @@
+/*
+ * include/linux/rtc/m48t59.h
+ *
+ * Definitions for the platform data of m48t59 RTC chip driver.
+ *
+ * Copyright (c) 2007 Wind River Systems, Inc.
+ *
+ * Mark Zhan <rongkai.zhan@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_RTC_M48T59_H_
+#define _LINUX_RTC_M48T59_H_
+
+/*
+ * M48T59 Register Offset
+ */
+#define M48T59_YEAR		0x1fff
+#define M48T59_MONTH		0x1ffe
+#define M48T59_MDAY		0x1ffd	/* Day of Month */
+#define M48T59_WDAY		0x1ffc	/* Day of Week */
+#define M48T59_WDAY_CB			0x20	/* Century Bit */
+#define M48T59_WDAY_CEB			0x10	/* Century Enable Bit */
+#define M48T59_HOUR		0x1ffb
+#define M48T59_MIN		0x1ffa
+#define M48T59_SEC		0x1ff9
+#define M48T59_CNTL		0x1ff8
+#define M48T59_CNTL_READ		0x40
+#define M48T59_CNTL_WRITE		0x80
+#define M48T59_WATCHDOG		0x1ff7
+#define M48T59_INTR		0x1ff6
+#define M48T59_INTR_AFE			0x80	/* Alarm Interrupt Enable */
+#define M48T59_INTR_ABE			0x20
+#define M48T59_ALARM_DATE	0x1ff5
+#define M48T59_ALARM_HOUR	0x1ff4
+#define M48T59_ALARM_MIN	0x1ff3
+#define M48T59_ALARM_SEC	0x1ff2
+#define M48T59_UNUSED		0x1ff1
+#define M48T59_FLAGS		0x1ff0
+#define M48T59_FLAGS_WDT		0x80	/* watchdog timer expired */
+#define M48T59_FLAGS_AF			0x40	/* alarm */
+#define M48T59_FLAGS_BF			0x10	/* low battery */
+
+#define M48T59_NVRAM_SIZE	0x1ff0
+
+struct m48t59_plat_data {
+	/* The method to access M48T59 registers,
+	 * NOTE: The 'ofs' should be 0x00~0x1fff
+	 */
+	void (*write_byte)(struct device *dev, u32 ofs, u8 val);
+	unsigned char (*read_byte)(struct device *dev, u32 ofs);
+};
+
+#endif /* _LINUX_RTC_M48T59_H_ */
-- 
cgit v1.2.3


From 623e71b035cb5271028500720b3622ba76db42bb Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Tue, 17 Jul 2007 04:05:28 -0700
Subject: fbcon: allow fbcon to use the primary display driver

Allow fbcon to select the primary display adapter using the
fb_is_primary_device() arch-specific helper.  If a a primary adapter is
detected, fbcon will unbind the old adapter from the VT layer, then rebind
using the new adapter.  This requires that bind_/unbind_con_driver() be made
public.

Because this feature may produce unexpected behavior (from the user's POV),
this must be explicitly enabled in Kconfig.

[akpm@linux-foundation.org: export unbind_con_driver]
Signed-off-by: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vt_kern.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index d961635d0e61..0d034d89ee8a 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -75,6 +75,10 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 int vt_waitactive(int vt);
 void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
+extern int bind_con_driver(const struct consw *csw, int first, int last,
+			   int deflt);
+extern int unbind_con_driver(const struct consw *csw, int first, int last,
+			     int deflt);
 
 /*
  * vc_screen.c shares this temporary buffer with the console write code so that
-- 
cgit v1.2.3


From cfafca8067c6defbaeb28cb898b7b3f8abdfe20d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jesse.barnes@intel.com>
Date: Tue, 17 Jul 2007 04:05:33 -0700
Subject: fbdev: fbcon: console unregistration from unregister_framebuffer

This allows for proper console unregistration via the VT layer, and updates
the FB layer to use it.  This makes debugging new console drivers much easier,
since you can properly clean them up before unloading.

[adaplas]
unregister_framebuffer() is typically called as part of the driver's
module_exit(). Doing so otherwise will freeze the machine as the VT layer is
holding reference counts on fbcon, and fbcon on the driver.  With this change,
it allows unregister_framebuffer() to be called safely anywhere as needed.

Additions from the original:  If multiple drivers are used by fbcon, and if
one of them unregisters, a driver will take over the consoles vacated by the
outgoing one (via set_con2fb_map).   Once only the outgoing driver remains,
then fbcon will unbind from the VT layer (if CONFIG_HW_CONSOLE_UNBINDING is
set to y).

It is important that these drivers implement fb_open() and fb_release()
just to ensure that no other process is using the driver. Likewise, these
drivers _must_ check the return value of unregister_framebuffer().

[akpm@linux-foundation.org: make fbcon_unbind() stub inline]
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Signed-off-by: Antonino Daplas <adaplas@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 66226824ab68..8628423c6dd3 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -529,6 +529,8 @@ struct fb_cursor_user {
 #define FB_EVENT_CONBLANK               0x0C
 /*      Get drawing requirements        */
 #define FB_EVENT_GET_REQ                0x0D
+/*      Unbind from the console if possible */
+#define FB_EVENT_FB_UNBIND              0x0E
 
 struct fb_event {
 	struct fb_info *info;
-- 
cgit v1.2.3


From b7269dd2b97b9aedb64e15fdec5575345d091925 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 17 Jul 2007 04:05:34 -0700
Subject: vt: add comment for unbind_con_driver()

- add comment for unbind_con_driver().
- bind_con_driver() is made private again

Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Signed-off-by: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vt_kern.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 0d034d89ee8a..699b7e9864fa 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -75,8 +75,6 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 int vt_waitactive(int vt);
 void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
-extern int bind_con_driver(const struct consw *csw, int first, int last,
-			   int deflt);
 extern int unbind_con_driver(const struct consw *csw, int first, int last,
 			     int deflt);
 
-- 
cgit v1.2.3


From 9900abfb5e8192f0eafcd9b9dd5d54011e46c76c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Tue, 17 Jul 2007 04:05:50 -0700
Subject: fbdev: Add fb_append_extra_logo()

Add fb_append_extra_logo(), to append extra lines of logos below the standard
Linux logo.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Geoff Levand <geoffrey.levand@am.sony.com>
Acked-By: James Simmons <jsimmons@infradead.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linux_logo.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h
index 9c01bde5bf1b..08a92969c76e 100644
--- a/include/linux/linux_logo.h
+++ b/include/linux/linux_logo.h
@@ -33,5 +33,13 @@ struct linux_logo {
 };
 
 extern const struct linux_logo *fb_find_logo(int depth);
+#ifdef CONFIG_FB_LOGO_EXTRA
+extern void fb_append_extra_logo(const struct linux_logo *logo,
+				 unsigned int n);
+#else
+static inline void fb_append_extra_logo(const struct linux_logo *logo,
+					unsigned int n)
+{}
+#endif
 
 #endif /* _LINUX_LINUX_LOGO_H */
-- 
cgit v1.2.3


From fe0e3a9df6372d357d3fdc4b6265a5417f1e84e8 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@solidboot.com>
Date: Tue, 17 Jul 2007 04:05:55 -0700
Subject: OMAP: add TI OMAP1610 accelerator entry.

Signed-off-by: Trilok Soni <soni.trilok@gmail.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 8628423c6dd3..cec54106aa87 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -119,6 +119,7 @@ struct dentry;
 #define FB_ACCEL_NV_40          46      /* nVidia Arch 40               */
 #define FB_ACCEL_XGI_VOLARI_V	47	/* XGI Volari V3XT, V5, V8      */
 #define FB_ACCEL_XGI_VOLARI_Z	48	/* XGI Volari Z7                */
+#define FB_ACCEL_OMAP1610	49	/* TI OMAP16xx                  */
 #define FB_ACCEL_NEOMAGIC_NM2070 90	/* NeoMagic NM2070              */
 #define FB_ACCEL_NEOMAGIC_NM2090 91	/* NeoMagic NM2090              */
 #define FB_ACCEL_NEOMAGIC_NM2093 92	/* NeoMagic NM2093              */
-- 
cgit v1.2.3


From 713f6ab18b0e7d39f14401362bfe8015b1aedde1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 17 Jul 2007 04:06:12 -0700
Subject: md: improve the is_mddev_idle test fix

Don't use 'unsigned' variable to track sync vs non-sync IO, as the only thing
we want to do with them is a signed comparison, and fix up the comment which
had become quite wrong.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/raid/md_k.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index de72c49747c8..28ac632b42dd 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -51,7 +51,7 @@ struct mdk_rdev_s
 
 	sector_t size;			/* Device size (in blocks) */
 	mddev_t *mddev;			/* RAID array if running */
-	unsigned long last_events;	/* IO event timestamp */
+	long last_events;		/* IO event timestamp */
 
 	struct block_device *bdev;	/* block device handle */
 
-- 
cgit v1.2.3


From 4ad1366376bfef32ec0ffa12d1faa483d6f330bd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 17 Jul 2007 04:06:13 -0700
Subject: md: change bitmap_unplug and others to void functions

bitmap_unplug only ever returns 0, so it may as well be void.  Two callers try
to print a message if it returns non-zero, but that message is already printed
by bitmap_file_kick.

write_page returns an error which is not consistently checked.  It always
causes BITMAP_WRITE_ERROR to be set on an error, and that can more
conveniently be checked.

When the return of write_page is checked, an error causes bitmap_file_kick to
be called - so move that call into write_page - and protect against recursive
calls into bitmap_file_kick.

bitmap_update_sb returns an error that is never checked.

So make these 'void' and be consistent about checking the bit.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/raid/bitmap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index dd5a05d03d4f..75e17a05540e 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -262,7 +262,7 @@ int  bitmap_active(struct bitmap *bitmap);
 
 char *file_path(struct file *file, char *buf, int count);
 void bitmap_print_sb(struct bitmap *bitmap);
-int bitmap_update_sb(struct bitmap *bitmap);
+void bitmap_update_sb(struct bitmap *bitmap);
 
 int  bitmap_setallbits(struct bitmap *bitmap);
 void bitmap_write_all(struct bitmap *bitmap);
@@ -278,8 +278,8 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int d
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
 void bitmap_close_sync(struct bitmap *bitmap);
 
-int bitmap_unplug(struct bitmap *bitmap);
-int bitmap_daemon_work(struct bitmap *bitmap);
+void bitmap_unplug(struct bitmap *bitmap);
+void bitmap_daemon_work(struct bitmap *bitmap);
 #endif
 
 #endif
-- 
cgit v1.2.3


From 3bd858ab1c451725c07a805dcb315215dc85b86e Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Tue, 17 Jul 2007 15:00:08 +0530
Subject: Introduce is_owner_or_cap() to wrap CAP_FOWNER use with fsuid check

Introduce is_owner_or_cap() macro in fs.h, and convert over relevant
users to it. This is done because we want to avoid bugs in the future
where we check for only effective fsuid of the current task against a
file's owning uid, without simultaneously checking for CAP_FOWNER as
well, thus violating its semantics.
[ XFS uses special macros and structures, and in general looked ...
untouchable, so we leave it alone -- but it has been looked over. ]

The (current->fsuid != inode->i_uid) check in generic_permission() and
exec_permission_lite() is left alone, because those operations are
covered by CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH. Similarly operations
falling under the purview of CAP_CHOWN and CAP_LEASE are also left alone.

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: Al Viro <viro@ftp.linux.org.uk>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 58ce336d4a6b..98205f680476 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -284,6 +284,7 @@ extern int dir_notify_enable;
 #include <linux/pid.h>
 #include <linux/mutex.h>
 #include <linux/sysctl.h>
+#include <linux/capability.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -990,6 +991,9 @@ enum {
 #define put_fs_excl() atomic_dec(&current->fs_excl)
 #define has_fs_excl() atomic_read(&current->fs_excl)
 
+#define is_owner_or_cap(inode)	\
+	((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER))
+
 /* not quite ready to be deprecated, but... */
 extern void lock_super(struct super_block *);
 extern void unlock_super(struct super_block *);
-- 
cgit v1.2.3


From 8dfd588c3180b7403c402b4545164ee4543f8f86 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Tue, 17 Jul 2007 22:29:46 +0100
Subject: smp_call_function_single() should be a macro on UP

... or we end up with header include order problems from hell.

E.g. on m68k this is 100% fatal - local_irq_enable() there
wants preempt_count(), which wants task_struct fields, which
we won't have when we are in smp.h pulled from sched.h.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 8039daced688..259a13c3bd98 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,7 +7,6 @@
  */
 
 #include <linux/errno.h>
-#include <asm/system.h>
 
 extern void cpu_idle(void);
 
@@ -100,15 +99,14 @@ static inline int up_smp_call_function(void)
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
-					   void *info, int retry, int wait)
-{
-	WARN_ON(cpuid != 0);
-	local_irq_disable();
-	func(info);
-	local_irq_enable();
-	return 0;
-}
+#define smp_call_function_single(cpuid, func, info, retry, wait) \
+({ \
+	WARN_ON(cpuid != 0);	\
+	local_irq_disable();	\
+	(func)(info);		\
+	local_irq_enable();	\
+	0;			\
+})
 
 #endif /* !SMP */
 
-- 
cgit v1.2.3


From cb32da0416b823b7f4b65e7e85d6cba16ca4d1e1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 18 Jul 2007 09:18:36 +0900
Subject: slob: Kill off duplicate kzalloc() definition.

With the slab zeroing allocations cleanups Christoph stubbed in a generic
kzalloc(), which was missed on SLOB. Follow the SLAB/SLUB changes and
kill off the __kzalloc() wrapper that SLOB was using.

Reported-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slob_def.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index a2daf2d418a9..59a3fa476ab9 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -33,14 +33,4 @@ static inline void *__kmalloc(size_t size, gfp_t flags)
 	return kmalloc(size, flags);
 }
 
-/**
- * kzalloc - allocate memory. The memory is set to zero.
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
- */
-static inline void *kzalloc(size_t size, gfp_t flags)
-{
-	return __kzalloc(size, flags);
-}
-
 #endif /* __LINUX_SLOB_DEF_H */
-- 
cgit v1.2.3


From 97ac73506c0ba93f30239bb57b4cfc5d73e68a62 Mon Sep 17 00:00:00 2001
From: Amit Arora <aarora@in.ibm.com>
Date: Tue, 17 Jul 2007 21:42:44 -0400
Subject: sys_fallocate() implementation on i386, x86_64 and powerpc

fallocate() is a new system call being proposed here which will allow
applications to preallocate space to any file(s) in a file system.
Each file system implementation that wants to use this feature will need
to support an inode operation called ->fallocate().
Applications can use this feature to avoid fragmentation to certain
level and thus get faster access speed. With preallocation, applications
also get a guarantee of space for particular file(s) - even if later the
the system becomes full.

Currently, glibc provides an interface called posix_fallocate() which
can be used for similar cause. Though this has the advantage of working
on all file systems, but it is quite slow (since it writes zeroes to
each block that has to be preallocated). Without a doubt, file systems
can do this more efficiently within the kernel, by implementing
the proposed fallocate() system call. It is expected that
posix_fallocate() will be modified to call this new system call first
and incase the kernel/filesystem does not implement it, it should fall
back to the current implementation of writing zeroes to the new blocks.
ToDos:
1. Implementation on other architectures (other than i386, x86_64,
   and ppc). Patches for s390(x) and ia64 are already available from
   previous posts, but it was decided that they should be added later
   once fallocate is in the mainline. Hence not including those patches
   in this take.
2. Changes to glibc,
   a) to support fallocate() system call
   b) to make posix_fallocate() and posix_fallocate64() call fallocate()

Signed-off-by: Amit Arora <aarora@in.ibm.com>
---
 include/linux/falloc.h   | 6 ++++++
 include/linux/fs.h       | 2 ++
 include/linux/syscalls.h | 1 +
 3 files changed, 9 insertions(+)
 create mode 100644 include/linux/falloc.h

(limited to 'include/linux')

diff --git a/include/linux/falloc.h b/include/linux/falloc.h
new file mode 100644
index 000000000000..8e912ab6a072
--- /dev/null
+++ b/include/linux/falloc.h
@@ -0,0 +1,6 @@
+#ifndef _FALLOC_H_
+#define _FALLOC_H_
+
+#define FALLOC_FL_KEEP_SIZE	0x01 /* default is extend size */
+
+#endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98205f680476..0b806c5e32eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1147,6 +1147,8 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
+	long (*fallocate)(struct inode *inode, int mode, loff_t offset,
+			  loff_t len);
 };
 
 struct seq_file;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83d0ec11235e..7a8b1e3322e0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
 			    const struct itimerspec __user *utmr);
 asmlinkage long sys_eventfd(unsigned int count);
+asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
-- 
cgit v1.2.3


From a2df2a63407803a833f82e1fa6693826c8c9d584 Mon Sep 17 00:00:00 2001
From: Amit Arora <aarora@in.ibm.com>
Date: Tue, 17 Jul 2007 21:42:41 -0400
Subject: fallocate support in ext4

This patch implements ->fallocate() inode operation in ext4. With this
patch users of ext4 file systems will be able to use fallocate() system
call for persistent preallocation. Current implementation only supports
preallocation for regular files (directories not supported as of date)
with extent maps. This patch does not support block-mapped files currently.
Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of
now.

Signed-off-by: Amit Arora <aarora@in.ibm.com>
---
 include/linux/ext4_fs.h         |  8 ++++++++
 include/linux/ext4_fs_extents.h | 15 +++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f78625a..87c2d7a05b01 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -102,6 +102,7 @@
 				 EXT4_GOOD_OLD_FIRST_INO : \
 				 (s)->s_first_ino)
 #endif
+#define EXT4_BLOCK_ALIGN(size, blkbits)		ALIGN((size), (1 << (blkbits)))
 
 /*
  * Macro-instructions used to manage fragments
@@ -225,6 +226,11 @@ struct ext4_new_group_data {
 	__u32 free_blocks_count;
 };
 
+/*
+ * Following is used by preallocation code to tell get_blocks() that we
+ * want uninitialzed extents.
+ */
+#define EXT4_CREATE_UNINITIALIZED_EXT		2
 
 /*
  * ioctl commands
@@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 extern void ext4_ext_truncate(struct inode *, struct page *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
+extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+			  loff_t len);
 static inline int
 ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 			unsigned long max_blocks, struct buffer_head *bh,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index acfe59740b03..e3d5afc6f23e 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -188,6 +188,21 @@ ext4_ext_invalidate_cache(struct inode *inode)
 	EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
 }
 
+static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
+{
+	ext->ee_len |= cpu_to_le16(0x8000);
+}
+
+static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
+{
+	return (int)(le16_to_cpu((ext)->ee_len) & 0x8000);
+}
+
+static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
+{
+	return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF);
+}
+
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
 extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
-- 
cgit v1.2.3


From 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 Mon Sep 17 00:00:00 2001
From: Amit Arora <aarora@in.ibm.com>
Date: Tue, 17 Jul 2007 21:42:38 -0400
Subject: write support for preallocated blocks

This patch adds write support to the uninitialized extents that get
created when a preallocation is done using fallocate(). It takes care of
splitting the extents into multiple (upto three) extents and merging the
new split extents with neighbouring ones, if possible.

Signed-off-by: Amit Arora <aarora@in.ibm.com>
---
 include/linux/ext4_fs_extents.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index e3d5afc6f23e..edf49ec89eac 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
 
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_try_to_merge(struct inode *inode,
+				 struct ext4_ext_path *path,
+				 struct ext4_extent *);
 extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
 extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
 extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
-- 
cgit v1.2.3


From e4f48861993294c27849076741eb0c090482560b Mon Sep 17 00:00:00 2001
From: Semih Hazar <semih.hazar@indefia.com>
Date: Wed, 18 Jul 2007 00:35:56 -0400
Subject: Input: ads7846 - introduce sample settling delay

The ads7846 driver has support for filtering, but when the chip gets
deselected between samples this causes noise. This patch adds support
for an optional settling delay time, so that two consecutive samples
will be taken with the specified delay time apart.  This ensures that
the chip won't be deselected, so the noise won't appear.

Filtering can still be done, but will have less work to do since each
time a new sample is taken the same delay applies.

Signed-off-by: Semih Hazar <semih.hazar@indefia.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/spi/ads7846.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 3387e44dfd13..a44fa7a02bd9 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -16,6 +16,14 @@ struct ads7846_platform_data {
 	u16	vref_delay_usecs;	/* 0 for external vref; etc */
 	int	keep_vref_on:1;		/* set to keep vref on for differential
 					 * measurements as well */
+
+	/* Settling time of the analog signals; a function of Vcc and the
+	 * capacitance on the X/Y drivers.  If set to non-zero, two samples
+	 * are taken with settle_delay us apart, and the second one is used.
+	 * ~150 uSec with 0.01uF caps.
+	 */
+	u16	settle_delay_usecs;
+
 	u16	x_plate_ohms;
 	u16	y_plate_ohms;
 
-- 
cgit v1.2.3


From 1d25891f3241103d14ea78236504474a138b8ada Mon Sep 17 00:00:00 2001
From: Semih Hazar <semih.hazar@indefia.com>
Date: Wed, 18 Jul 2007 00:36:04 -0400
Subject: Input: ads7846 - re-check pendown status before reporting events

Pendown status from the PENIRQ pin is currently read only at the beginning
of a sample set. If the pen is lifted just after sampling has began then
sampled values become wrong.

This patch adds an optional platform penirq_recheck_delay attribute.  If
non-zero, samples are only reported to the input subsystem if PENIRQ is
still active that long after the samples taken.

Signed-off-by: Semih Hazar <semih.hazar@indefia.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/spi/ads7846.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index a44fa7a02bd9..334d31411629 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -24,6 +24,12 @@ struct ads7846_platform_data {
 	 */
 	u16	settle_delay_usecs;
 
+	/* If set to non-zero, after samples are taken this delay is applied
+	 * and penirq is rechecked, to help avoid false events.  This value
+	 * is affected by the material used to build the touch layer.
+	 */
+	u16	penirq_recheck_delay_usecs;
+
 	u16	x_plate_ohms;
 	u16	y_plate_ohms;
 
-- 
cgit v1.2.3


From 85f202d5df877f8adcda342b74ab11fbdfea753d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Wed, 18 Jul 2007 00:37:01 -0400
Subject: Input: add driver for Fujitsu serial touchscreens

These serial touchscreens are found on some Fujitsu lifebook
P-series laptops, and the B6210. Using this requires a new
version of inputattach and doing:

 inputattach -fjt /dev/ttyS0

Big thanks to Stephen Hemminger for testing it and making it
work on his B6210 laptop.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index d9377ce9ffd1..9f3825014674 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -210,5 +210,6 @@ static inline void serio_unpin_driver(struct serio *serio)
 #define SERIO_TOUCHRIGHT	0x32
 #define SERIO_TOUCHWIN	0x33
 #define SERIO_TAOSEVM	0x34
+#define SERIO_FUJITSU	0x35
 
 #endif
-- 
cgit v1.2.3


From 5517853712f1f6daac8a7b2590f9b821e767aa13 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Wed, 18 Jul 2007 00:38:45 -0400
Subject: Input: document intended meaning of KEY_SWITCHVIDEOMODE

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 18c98b543030..e02c6a66b2ba 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -344,7 +344,8 @@ struct input_absinfo {
 #define KEY_BRIGHTNESSUP	225
 #define KEY_MEDIA		226
 
-#define KEY_SWITCHVIDEOMODE	227
+#define KEY_SWITCHVIDEOMODE	227	/* Cycle between available video
+					   outputs (Monitor/LCD/TV-out/etc) */
 #define KEY_KBDILLUMTOGGLE	228
 #define KEY_KBDILLUMDOWN	229
 #define KEY_KBDILLUMUP		230
-- 
cgit v1.2.3


From 456ad75c89cdb72e11dcdb6b0794802a6f50c8a3 Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Wed, 18 Jul 2007 02:10:54 -0700
Subject: [NET]: move dev_mc_discard from dev_mcast.c to dev.c

Because this function is only called by unregister_netdevice,
this moving could make this non-global function static,
and also remove its declaration in netdevice.h;

Any further, function __dev_addr_discard is also just called by
dev_mc_discard and dev_unicast_discard, keeping this two functions
both in one c file could make __dev_addr_discard also static
and remove its declaration in netdevice.h;

Futhermore, the sequential call to dev_unicast_discard and then
dev_mc_discard in unregister_netdevice have a similar mechanism that:
(netif_tx_lock_bh / __dev_addr_discard / netif_tx_unlock_bh),
they should merged into one to eliminate duplicates in acquiring and
releasing the dev->_xmit_lock, this would be done in my following patch.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index da7a13c97eb8..9820ca1e45e2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1098,10 +1098,8 @@ extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void		dev_mc_discard(struct net_device *dev);
 extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
-extern void		__dev_addr_discard(struct dev_addr_list **list);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-- 
cgit v1.2.3


From ebd61cc042b16e6cf2486aafbfff9e4be8c213ee Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 18 Jul 2007 02:21:50 -0700
Subject: [NETFILTER]: ipt_iprange.h must #include <linux/types.h>

ipt_iprange.h must #include <linux/types.h> since it uses __be32.

This patch fixes kernel Bugzilla #7604.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_iprange.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h
index 34ab0fb736e2..a92fefc3c7ec 100644
--- a/include/linux/netfilter_ipv4/ipt_iprange.h
+++ b/include/linux/netfilter_ipv4/ipt_iprange.h
@@ -1,6 +1,8 @@
 #ifndef _IPT_IPRANGE_H
 #define _IPT_IPRANGE_H
 
+#include <linux/types.h>
+
 #define IPRANGE_SRC		0x01	/* Match source IP address */
 #define IPRANGE_DST		0x02	/* Match destination IP address */
 #define IPRANGE_SRC_INV		0x10	/* Negate the condition */
-- 
cgit v1.2.3


From 749269facaf87f6e516c3af12763e03181b9c139 Mon Sep 17 00:00:00 2001
From: Amit Arora <aarora@in.ibm.com>
Date: Wed, 18 Jul 2007 09:02:56 -0400
Subject: Change on-disk format to support 2^15 uninitialized extents

This change was suggested by Andreas Dilger.
This patch changes the EXT_MAX_LEN value and extent code which marks/checks
uninitialized extents. With this change it will be possible to have
initialized extents with 2^15 blocks (earlier the max blocks we could have
was 2^15 - 1). This way we can have better extent-to-block alignment.
Now, maximum number of blocks we can have in an initialized extent is 2^15
and in an uninitialized extent is 2^15 - 1.

Signed-off-by: Amit Arora <aarora@in.ibm.com>
---
 include/linux/ext4_fs_extents.h | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index edf49ec89eac..81406f3655d4 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
 
 #define EXT_MAX_BLOCK	0xffffffff
 
-#define EXT_MAX_LEN	((1UL << 15) - 1)
+/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+ * particular extent is an initialized extent or an uninitialized (i.e.
+ * preallocated).
+ * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
+ * uninitialized extent.
+ * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
+ * uninitialized one. In other words, if MSB of ee_len is set, it is an
+ * uninitialized extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an uninitialized extent of zero length and
+ * thus we make it as a special case of initialized extent with 0x8000 length.
+ * This way we get better extent-to-group alignment for initialized extents.
+ * Hence, the maximum number of blocks we can have in an *initialized*
+ * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ */
+#define EXT_INIT_MAX_LEN	(1UL << 15)
+#define EXT_UNINIT_MAX_LEN	(EXT_INIT_MAX_LEN - 1)
 
 
 #define EXT_FIRST_EXTENT(__hdr__) \
@@ -190,17 +208,22 @@ ext4_ext_invalidate_cache(struct inode *inode)
 
 static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
 {
-	ext->ee_len |= cpu_to_le16(0x8000);
+	/* We can not have an uninitialized extent of zero length! */
+	BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
+	ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
 }
 
 static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
 {
-	return (int)(le16_to_cpu((ext)->ee_len) & 0x8000);
+	/* Extent with ee_len of 0x8000 is treated as an initialized extent */
+	return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
 }
 
 static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
 {
-	return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF);
+	return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+		le16_to_cpu(ext->ee_len) :
+		(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
 }
 
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
-- 
cgit v1.2.3


From ff9ddf7e847c4dc533f119efb6c77a6e57ab6397 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 18 Jul 2007 09:24:20 -0400
Subject: ext4: copy i_flags to inode flags on write

Propagate flags such as S_APPEND, S_IMMUTABLE, etc. from i_flags into
ext4-specific i_flags.  Quota code changes these flags on quota files
(to make it harder for sysadmin to screw himself) and these changes were
not correctly propagated into the filesystem.

(This is a forward port patch from ext3)

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/ext4_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 87c2d7a05b01..33b2b1a2d790 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -868,6 +868,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern void ext4_truncate (struct inode *);
 extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
-- 
cgit v1.2.3


From e23291b9120c11aafb2ee76fb71a062eb3c1056c Mon Sep 17 00:00:00 2001
From: "Jose R. Santos" <jrs@us.ibm.com>
Date: Wed, 18 Jul 2007 08:57:06 -0400
Subject: jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG

When the JBD code was forked to create the new JBD2 code base, the
references to CONFIG_JBD_DEBUG where never changed to
CONFIG_JBD2_DEBUG.  This patch fixes that.

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/ext4_fs.h    | 4 ++--
 include/linux/ext4_fs_sb.h | 2 +-
 include/linux/jbd2.h       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 33b2b1a2d790..45ec7258b2b1 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -243,7 +243,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC_GROUP_ADD		_IOW('f', 8,struct ext4_new_group_input)
 #define	EXT4_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
 #define	EXT4_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
 #define EXT4_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
 #endif
 #define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
@@ -259,7 +259,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC32_GETRSVSZ		_IOR('f', 5, int)
 #define EXT4_IOC32_SETRSVSZ		_IOW('f', 6, int)
 #define EXT4_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
 #define EXT4_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
 #endif
 #define EXT4_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 2347557a327a..0f7dc15924bf 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -73,7 +73,7 @@ struct ext4_sb_info {
 	struct list_head s_orphan;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
 	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
 	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
 #endif
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0e0fedd2039a..a37aca31de46 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -50,11 +50,11 @@
  */
 #define JBD_DEFAULT_MAX_COMMIT_AGE 5
 
-#ifdef CONFIG_JBD_DEBUG
+#ifdef CONFIG_JBD2_DEBUG
 /*
  * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
  * consistency checks.  By default we don't do this unless
- * CONFIG_JBD_DEBUG is on.
+ * CONFIG_JBD2_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
 extern int jbd2_journal_enable_debug;
-- 
cgit v1.2.3


From 0f49d5d019afa4e94253bfc92f0daca3badb990b Mon Sep 17 00:00:00 2001
From: "Jose R. Santos" <jrs@us.ibm.com>
Date: Wed, 18 Jul 2007 08:50:18 -0400
Subject: jbd2: Move jbd2-debug file to debugfs

The jbd2-debug file used to be located in /proc/sys/fs/jbd2-debug, but it
incorrectly used create_proc_entry() instead of the sysctl routines, and
no proc entry was ever created.

Instead of fixing this we might as well move the jbd2-debug file to
debugfs which would be the preferred location for this kind of tunable.
The new location is now /sys/kernel/debug/jbd2/jbd2-debug.

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a37aca31de46..260d6d76c5f3 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -57,7 +57,7 @@
  * CONFIG_JBD2_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int jbd2_journal_enable_debug;
+extern u8 jbd2_journal_enable_debug;
 
 #define jbd_debug(n, f, a...)						\
 	do {								\
-- 
cgit v1.2.3


From ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80 Mon Sep 17 00:00:00 2001
From: Kalpak Shah <kalpak@clusterfs.com>
Date: Wed, 18 Jul 2007 09:15:20 -0400
Subject: ext4: Add nanosecond timestamps

This patch adds nanosecond timestamps for ext4. This involves adding
*time_extra fields to the ext4_inode to extend the timestamps to
64-bits.  Creation time is also added by this patch.

These extended fields will fit into an inode if the filesystem was
formatted with large inodes (-I 256 or larger) and there are currently
no EAs consuming all of the available space. For new inodes we always
reserve enough space for the kernel's known extended fields, but for
inodes created with an old kernel this might not have been the case. So
this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature
flag(ro-compat so that older kernels can't create inodes with a smaller
extra_isize). which indicates if the fields fitting inside
s_min_extra_isize are available or not.  If the expansion of inodes if
unsuccessful then this feature will be disabled.  This feature is only
enabled if requested by the sysadmin.

None of the extended inode fields is critical for correct filesystem
operation.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Kalpak Shah <kalpak@clusterfs.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/ext4_fs.h    | 86 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/ext4_fs_i.h  |  5 +++
 include/linux/ext4_fs_sb.h |  1 +
 3 files changed, 91 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 45ec7258b2b1..df5e38faa15f 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -288,7 +288,7 @@ struct ext4_inode {
 	__le16	i_uid;		/* Low 16 bits of Owner Uid */
 	__le32	i_size;		/* Size in bytes */
 	__le32	i_atime;	/* Access time */
-	__le32	i_ctime;	/* Creation time */
+	__le32	i_ctime;	/* Inode Change time */
 	__le32	i_mtime;	/* Modification time */
 	__le32	i_dtime;	/* Deletion Time */
 	__le16	i_gid;		/* Low 16 bits of Group Id */
@@ -337,10 +337,85 @@ struct ext4_inode {
 	} osd2;				/* OS dependent 2 */
 	__le16	i_extra_isize;
 	__le16	i_pad1;
+	__le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
+	__le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
+	__le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+	__le32  i_crtime;       /* File Creation time */
+	__le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
 };
 
 #define i_size_high	i_dir_acl
 
+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
+
+/*
+ * Extended fields will fit into an inode if the filesystem was formatted
+ * with large inodes (-I 256 or larger) and there are not currently any EAs
+ * consuming all of the available space. For new inodes we always reserve
+ * enough space for the kernel's known extended fields, but for inodes
+ * created with an old kernel this might not have been the case. None of
+ * the extended inode fields is critical for correct filesystem operation.
+ * This macro checks if a certain field fits in the inode. Note that
+ * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
+ */
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field)	\
+	((offsetof(typeof(*ext4_inode), field) +	\
+	  sizeof((ext4_inode)->field))			\
+	<= (EXT4_GOOD_OLD_INODE_SIZE +			\
+	    (einode)->i_extra_isize))			\
+
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+       return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+			   time->tv_sec >> 32 : 0) |
+			   ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+       if (sizeof(time->tv_sec) > 4)
+	       time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+			       << 32;
+       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)			       \
+do {									       \
+	(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);	       \
+	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+		(raw_inode)->xtime ## _extra =				       \
+				ext4_encode_extra_time(&(inode)->xtime);       \
+} while (0)
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)			       \
+do {									       \
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
+		(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
+		(raw_inode)->xtime ## _extra =				       \
+				ext4_encode_extra_time(&(einode)->xtime);      \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)			       \
+do {									       \
+	(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);       \
+	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+		ext4_decode_extra_time(&(inode)->xtime,			       \
+				       raw_inode->xtime ## _extra);	       \
+} while (0)
+
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)			       \
+do {									       \
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
+		(einode)->xtime.tv_sec = 				       \
+			(signed)le32_to_cpu((raw_inode)->xtime);	       \
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
+		ext4_decode_extra_time(&(einode)->xtime,		       \
+				       raw_inode->xtime ## _extra);	       \
+} while (0)
+
 #if defined(__KERNEL__) || defined(__linux__)
 #define i_reserved1	osd1.linux1.l_i_reserved1
 #define i_frag		osd2.linux2.l_i_frag
@@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
 	return container_of(inode, struct ext4_inode_info, vfs_inode);
 }
 
+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+	return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
@@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
@@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 					 EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
 
 /*
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 9de494406995..1a511e9905aa 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -153,6 +153,11 @@ struct ext4_inode_info {
 
 	unsigned long i_ext_generation;
 	struct ext4_ext_cache i_cached_extent;
+	/*
+	 * File creation time. Its function is same as that of
+	 * struct timespec i_{a,c,m}time in the generic inode.
+	 */
+	struct timespec i_crtime;
 };
 
 #endif	/* _LINUX_EXT4_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 0f7dc15924bf..1b2ffee12be9 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -81,6 +81,7 @@ struct ext4_sb_info {
 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
 #endif
+	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
 
 #ifdef EXTENTS_STATS
 	/* ext4 extents stats */
-- 
cgit v1.2.3


From 6dd4ee7cab7e3a17c571aebd444f4344c8c4946e Mon Sep 17 00:00:00 2001
From: Kalpak Shah <kalpak@clusterfs.com>
Date: Wed, 18 Jul 2007 09:19:57 -0400
Subject: ext4: Expand extra_inodes space per the s_{want,min}_extra_isize
 fields

We need to make sure that existing ext3 filesystems can also avail the
new fields that have been added to the ext4 inode. We use
s_want_extra_isize and s_min_extra_isize to decide by how much we should
expand the inode. If EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature is set
then we expand the inode by max(s_want_extra_isize, s_min_extra_isize ,
sizeof(ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE) bytes. Actually it is
still an open question about whether users should be able to set
s_*_extra_isize smaller than the known fields or not.

This patch also adds the functionality to expand inodes to include the
newly added fields. We start by trying to expand by s_want_extra_isize
bytes and if its fails we try to expand by s_min_extra_isize bytes. This
is done by changing the i_extra_isize if enough space is available in
the inode and no EAs are present. If EAs are present and there is enough
space in the inode then the EAs in the inode are shifted to make space.
If enough space is not available in the inode due to the EAs then 1 or
more EAs are shifted to the external EA block. In the worst case when
even the external EA block does not have enough space we inform the user
that some EA would need to be deleted or s_min_extra_isize would have to
be reduced.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Kalpak Shah <kalpak@clusterfs.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/ext4_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index df5e38faa15f..52dcc24dd986 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -202,6 +202,7 @@ struct ext4_group_desc
 #define EXT4_STATE_JDATA		0x00000001 /* journaled data exists */
 #define EXT4_STATE_NEW			0x00000002 /* inode is newly created */
 #define EXT4_STATE_XATTR		0x00000004 /* has in-inode xattrs */
+#define EXT4_STATE_NO_EXPAND		0x00000008 /* No space for expansion */
 
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
-- 
cgit v1.2.3


From f8628a14a27eb4512a1ede43de1d9db4d9f92bc3 Mon Sep 17 00:00:00 2001
From: Andreas Dilger <adilger@clusterfs.com>
Date: Wed, 18 Jul 2007 08:38:01 -0400
Subject: ext4: Remove 65000 subdirectory limit

This patch adds support to ext4 for allowing more than 65000
subdirectories. Currently the maximum number of subdirectories is capped
at 32000.

If we exceed 65000 subdirectories in an htree directory it sets the
inode link count to 1 and no longer counts subdirectories.  The
directory link count is not actually used when determining if a
directory is empty, as that only counts subdirectories and not regular
files that might be in there.

A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if
the subdir count for any directory crosses 65000. A later fsck will clear
EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any directory
with >65000 subdirs.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Kalpak Shah <kalpak@clusterfs.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/ext4_fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 52dcc24dd986..cdee7aaa57aa 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -71,7 +71,7 @@
 /*
  * Maximal count of links to a file
  */
-#define EXT4_LINK_MAX		32000
+#define EXT4_LINK_MAX		65000
 
 /*
  * Macro-instructions used to manage several block sizes
@@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
@@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 					 EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
 
-- 
cgit v1.2.3


From b187f180cc942e50007aa039f8e3a620ee5f3171 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Wed, 18 Jul 2007 00:49:10 -0700
Subject: serial: add early_serial_setup() back to header file

early_serial_setup was removed from serial.h, but forgot to put in
serial_8250.h

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_8250.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 706ee9a4c80c..8518fa2a6f89 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -60,6 +60,8 @@ void serial8250_unregister_port(int line);
 void serial8250_suspend_port(int line);
 void serial8250_resume_port(int line);
 
+extern int early_serial_setup(struct uart_port *port);
+
 extern int serial8250_find_port(struct uart_port *p);
 extern int serial8250_find_port_for_earlycon(void);
 extern int setup_early_serial8250_console(char *cmdline);
-- 
cgit v1.2.3


From 8b4a40809e5330c9da5d20107d693d92d73b31dc Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 18 Jul 2007 00:49:11 -0700
Subject: zs: move to the serial subsystem

This is a reimplementation of the zs driver for the serial subsystem.  Any
resemblance to the old driver is purely coincidential.  ;-) I do hope I got
the handling of modem lines right -- better do not tackle me about the
issue unless you feel too good...

Any users of the old driver: please note the numbers of the serial lines
have now been swapped, i.e.  ttyS0 <-> ttyS1 and ttyS2 <-> ttyS3.  It has
to do with the modem lines mentioned above; basically the port A in a given
chip has to be initialised before the port B if you want to use the latter
as the serial console (which is usually the case), as operations on modem
lines of the serial line associated with the port B access both ports (see
the comment at the top of the driver for the details of wiring used).
Please update your scripts.

This is also the reason each SCC now requests an IRQ once only (as seen in
"/proc/interrupts") -- the handler takes care of both ports at once as the
line associated with the port B has to take status update interrupts from
both ports (and yet the line of the port A takes its own for itself too).
The old driver never got it right...

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_core.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 9c721cd2c9d6..773d8d8828ad 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -62,8 +62,9 @@
 /* NEC v850.  */
 #define PORT_V850E_UART	40
 
-/* DZ */
-#define PORT_DZ		47
+/* DEC */
+#define PORT_DZ		46
+#define PORT_ZS		47
 
 /* Parisc type numbers. */
 #define PORT_MUX	48
-- 
cgit v1.2.3


From 1e66df3ee301209f4a38df097d7cc5cb9b367a3f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:02 -0700
Subject: add kstrndup

Add a kstrndup function, modelled on strndup.  Like strndup this
returns a string copied into its own allocated memory, but it copies
no more than the specified number of bytes from the source.

Remove private strndup() from irda code.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Panagiotis Issaris <takis@issaris.org>
Cc: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 include/linux/string.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 7f2eb6a477f9..ee5e9ccc4aae 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -105,6 +105,7 @@ extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 
 extern char *kstrdup(const char *s, gfp_t gfp);
+extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
 extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
 
 #ifdef __cplusplus
-- 
cgit v1.2.3


From d84d1cc7647c7e4f77d517e2d87b4a106a0420d9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:02 -0700
Subject: add argv_split()

argv_split() is a helper function which takes a string, splits it at
whitespace, and returns a NULL-terminated argv vector.  This is
deliberately simple - it does no quote processing of any kind.

[ Seems to me that this is something which is already being done in
  the kernel, but I couldn't find any other implementations, either to
  steal or replace.  Keep an eye out. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/string.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index ee5e9ccc4aae..836062b7582a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -108,6 +108,9 @@ extern char *kstrdup(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
 extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
 
+extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
+extern void argv_free(char **argv);
+
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 0ab4dc92278a0f3816e486d6350c6652a72e06c8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:02 -0700
Subject: usermodehelper: split setup from execution

Rather than having hundreds of variations of call_usermodehelper for
various pieces of usermode state which could be set up, split the
info allocation and initialization from the actual process execution.

This means the general pattern becomes:
 info = call_usermodehelper_setup(path, argv, envp); /* basic state */
 call_usermodehelper_<SET EXTRA STATE>(info, stuff...);	/* extra state */
 call_usermodehelper_exec(info, wait);	/* run process and free info */

This patch introduces wrappers for all the existing calling styles for
call_usermodehelper_*, but folds their implementations into one.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: David Howells <dhowells@redhat.com>
Cc: Bj?rn Steinbrink <B.Steinbrink@gmx.de>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/kmod.h | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 10f505c8431d..c4cbe59d9c67 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -36,13 +36,51 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; }
 #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
 
 struct key;
-extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[],
-				    struct key *session_keyring, int wait);
+struct file;
+struct subprocess_info;
+
+/* Allocate a subprocess_info structure */
+struct subprocess_info *call_usermodehelper_setup(char *path,
+						  char **argv, char **envp);
+
+/* Set various pieces of state into the subprocess_info structure */
+void call_usermodehelper_setkeys(struct subprocess_info *info,
+				 struct key *session_keyring);
+int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
+				  struct file **filp);
+void call_usermodehelper_setcleanup(struct subprocess_info *info,
+				    void (*cleanup)(char **argv, char **envp));
+
+/* Actually execute the sub-process */
+int call_usermodehelper_exec(struct subprocess_info *info, int wait);
+
+/* Free the subprocess_info. This is only needed if you're not going
+   to call call_usermodehelper_exec */
+void call_usermodehelper_freeinfo(struct subprocess_info *info);
 
 static inline int
 call_usermodehelper(char *path, char **argv, char **envp, int wait)
 {
-	return call_usermodehelper_keys(path, argv, envp, NULL, wait);
+	struct subprocess_info *info;
+
+	info = call_usermodehelper_setup(path, argv, envp);
+	if (info == NULL)
+		return -ENOMEM;
+	return call_usermodehelper_exec(info, wait);
+}
+
+static inline int
+call_usermodehelper_keys(char *path, char **argv, char **envp,
+			 struct key *session_keyring, int wait)
+{
+	struct subprocess_info *info;
+
+	info = call_usermodehelper_setup(path, argv, envp);
+	if (info == NULL)
+		return -ENOMEM;
+
+	call_usermodehelper_setkeys(info, session_keyring);
+	return call_usermodehelper_exec(info, wait);
 }
 
 extern void usermodehelper_init(void);
-- 
cgit v1.2.3


From 10a0a8d4e3f6bf2d077f94344441909abe670f5a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:02 -0700
Subject: Add common orderly_poweroff()

Various pieces of code around the kernel want to be able to trigger an
orderly poweroff.  This pulls them together into a single
implementation.

By default the poweroff command is /sbin/poweroff, but it can be set
via sysctl: kernel/poweroff_cmd.  This is split at whitespace, so it
can include command-line arguments.

This patch replaces four other instances of invoking either "poweroff"
or "shutdown -h now": two sbus drivers, and acpi thermal
management.

sparc64 has its own "powerd"; still need to determine whether it should
be replaced by orderly_poweroff().

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Acked-by: Len Brown <lenb@kernel.org>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David S. Miller <davem@davemloft.net>
---
 include/linux/reboot.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 1dd1c707311f..85ea63f462af 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -67,6 +67,11 @@ extern void kernel_power_off(void);
 
 void ctrl_alt_del(void);
 
+#define POWEROFF_CMD_PATH_LEN	256
+extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
+
+extern int orderly_poweroff(bool force);
+
 /*
  * Emergency restart, callable from an interrupt handler.
  */
-- 
cgit v1.2.3


From 86313c488a6848b7ec2ba04e74f25f79dd32a0b7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:03 -0700
Subject: usermodehelper: Tidy up waiting

Rather than using a tri-state integer for the wait flag in
call_usermodehelper_exec, define a proper enum, and use that.  I've
preserved the integer values so that any callers I've missed should
still work OK.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Joel Becker <joel.becker@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: David Howells <dhowells@redhat.com>
---
 include/linux/kmod.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index c4cbe59d9c67..5dc13848891b 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -51,15 +51,21 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
 void call_usermodehelper_setcleanup(struct subprocess_info *info,
 				    void (*cleanup)(char **argv, char **envp));
 
+enum umh_wait {
+	UMH_NO_WAIT = -1,	/* don't wait at all */
+	UMH_WAIT_EXEC = 0,	/* wait for the exec, but not the process */
+	UMH_WAIT_PROC = 1,	/* wait for the process to complete */
+};
+
 /* Actually execute the sub-process */
-int call_usermodehelper_exec(struct subprocess_info *info, int wait);
+int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
 
 /* Free the subprocess_info. This is only needed if you're not going
    to call call_usermodehelper_exec */
 void call_usermodehelper_freeinfo(struct subprocess_info *info);
 
 static inline int
-call_usermodehelper(char *path, char **argv, char **envp, int wait)
+call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 {
 	struct subprocess_info *info;
 
@@ -71,7 +77,7 @@ call_usermodehelper(char *path, char **argv, char **envp, int wait)
 
 static inline int
 call_usermodehelper_keys(char *path, char **argv, char **envp,
-			 struct key *session_keyring, int wait)
+			 struct key *session_keyring, enum umh_wait wait)
 {
 	struct subprocess_info *info;
 
-- 
cgit v1.2.3


From 810bab448e563ffd1718d78e9a3756806b626acc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:03 -0700
Subject: use elfnote.h to generate vsyscall notes.

Use existing elfnote.h to generate vsyscall notes, rather than doing
it locally.  Changes elfnote.h a bit to suit, since this is the first
asm user, and it wasn't quite right.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.com>
---
 include/linux/elfnote.h | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 9a1e0674e56c..e831759b2fb5 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -38,17 +38,25 @@
  * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
  *      ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
  */
-#define ELFNOTE(name, type, desctype, descdata)	\
-.pushsection .note.name, "",@note	;	\
-  .align 4				;	\
+#define ELFNOTE_START(name, type, flags)	\
+.pushsection .note.name, flags,@note	;	\
+  .balign 4				;	\
   .long 2f - 1f		/* namesz */	;	\
-  .long 4f - 3f		/* descsz */	;	\
+  .long 4484f - 3f	/* descsz */	;	\
   .long type				;	\
 1:.asciz #name				;	\
-2:.align 4				;	\
-3:desctype descdata			;	\
-4:.align 4				;	\
+2:.balign 4				;	\
+3:
+
+#define ELFNOTE_END				\
+4484:.balign 4				;	\
 .popsection				;
+
+#define ELFNOTE(name, type, desc)		\
+	ELFNOTE_START(name, type, "")		\
+		desc			;	\
+	ELFNOTE_END
+
 #else	/* !__ASSEMBLER__ */
 #include <linux/elf.h>
 /*
-- 
cgit v1.2.3


From 5f4352fbffd6c45123dbce9e195efd54df4e177e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:04 -0700
Subject: Allocate and free vmalloc areas

Allocate/release a chunk of vmalloc address space:
 alloc_vm_area reserves a chunk of address space, and makes sure all
 the pagetables are constructed for that address range - but no pages.

 free_vm_area releases the address space range.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ian Pratt <ian.pratt@xensource.com>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: "Andi Kleen" <ak@muc.de>
---
 include/linux/vmalloc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 132b260aef1e..c2b10cae5da5 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -70,6 +70,10 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
 extern void unmap_kernel_range(unsigned long addr, unsigned long size);
 
+/* Allocate/destroy a 'vmalloc' VM area. */
+extern struct vm_struct *alloc_vm_area(size_t size);
+extern void free_vm_area(struct vm_struct *area);
+
 /*
  *	Internals.  Dont't use..
  */
-- 
cgit v1.2.3


From c85b04c3749507546f6d5868976e4793e35c2ec0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:05 -0700
Subject: xen: add pinned page flag

Add a new definition for PG_owner_priv_1 to define PG_pinned on Xen
pagetable pages.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
---
 include/linux/page-flags.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index ae2d79f2107e..731cd2ac3227 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -92,6 +92,7 @@
 
 /* PG_owner_priv_1 users should have descriptive aliases */
 #define PG_checked		PG_owner_priv_1 /* Used by some filesystems */
+#define PG_pinned		PG_owner_priv_1	/* Xen pinned pagetable */
 
 #if (BITS_PER_LONG > 32)
 /*
@@ -170,6 +171,10 @@ static inline void SetPageUptodate(struct page *page)
 #define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
 #define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
 
+#define PagePinned(page)	test_bit(PG_pinned, &(page)->flags)
+#define SetPagePinned(page)	set_bit(PG_pinned, &(page)->flags)
+#define ClearPagePinned(page)	clear_bit(PG_pinned, &(page)->flags)
+
 #define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
 #define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)
 #define ClearPageReserved(page)	clear_bit(PG_reserved, &(page)->flags)
-- 
cgit v1.2.3


From 9f27ee595038653ddf8bca871200d39247d6f4fc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Tue, 17 Jul 2007 18:37:06 -0700
Subject: xen: add virtual block device driver.

The block device frontend driver allows the kernel to access block
devices exported exported by a virtual machine containing a physical
block device driver.

Signed-off-by: Ian Pratt <ian.pratt@xensource.com>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Jens Axboe <axboe@kernel.dk>
---
 include/linux/major.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/major.h b/include/linux/major.h
index 7e7c9093919a..0cb98053537a 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -158,6 +158,8 @@
 #define VXSPEC_MAJOR		200	/* VERITAS volume config driver */
 #define VXDMP_MAJOR		201	/* VERITAS volume multipath driver */
 
+#define XENVBD_MAJOR		202	/* Xen virtual block device */
+
 #define MSR_MAJOR		202
 #define CPUID_MAJOR		203
 
-- 
cgit v1.2.3


From 1c50dc83f9ca752b1e1b985f1ce33d2695103ffa Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Tue, 30 Jan 2007 01:18:41 -0800
Subject: [SCSI] sas_ata: ata_post_internal should abort the sas_task

This patch adds a new field, lldd_task, to ata_queued_cmd so that libata
users such as libsas can associate some data with a qc.  The particular
ambition with this patch is to associate a sas_task with a qc; that way,
if libata decides to timeout a command, we can come back (in
sas_ata_post_internal) and abort the sas task.

One question remains: Is it necessary to reset the phy on error, or will
the libata error handler take care of it?  (Assuming that one is written,
of course.)  This patch, as it is today, works well enough to clean
things up when an ATA device probe attempt fails halfway through the probe,
though I'm not sure this is always the right thing to do.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 47cd2a1c5544..4abb758a0450 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -411,6 +411,7 @@ struct ata_queued_cmd {
 	ata_qc_cb_t		complete_fn;
 
 	void			*private_data;
+	void			*lldd_task;
 };
 
 struct ata_port_stats {
-- 
cgit v1.2.3


From 0c8db6beb81a07147f64cffd33bd43b9e96f4f40 Mon Sep 17 00:00:00 2001
From: "Prakash, Sathya" <sathya.prakash@lsi.com>
Date: Tue, 17 Jul 2007 13:40:10 +0530
Subject: [SCSI] add PCI_VENDOR_ID macro for Brocade in pci_ids.h

Adds PCI_VENDOR_ID_BROCADE macro in include/linux/pci_ids.h file. This macro
is used in MPT Fusion FC drivers to support Brocade branded FC controllers

signed-off-by: Sathya Prakash <sathya.prakash@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2c7add169539..13d36bb01a42 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2017,6 +2017,8 @@
 
 #define PCI_VENDOR_ID_ARIMA		0x161f
 
+#define PCI_VENDOR_ID_BROCADE		0x1657
+
 #define PCI_VENDOR_ID_SIBYTE		0x166d
 #define PCI_DEVICE_ID_BCM1250_PCI	0x0001
 #define PCI_DEVICE_ID_BCM1250_HT	0x0002
-- 
cgit v1.2.3


From 7132ab7f6e0309bb8e0424e395ba149aee0c750e Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Wed, 11 Jul 2007 11:43:07 -0500
Subject: Fix RGMII-ID handling in gianfar

The TSEC/eTSEC can detect the interface to the PHY automatically,
but it isn't able to detect whether the RGMII connection needs internal
delay.  So we need to detect that change in the device tree, propagate
it to the platform data, and then check it if we're in RGMII.  This fixes
a bug on the 8641D HPCN board where the Vitesse PHY doesn't use the delay
for RGMII.

Signed-off-by: Andy Fleming <afleming@freescale.com>
---
 include/linux/fsl_devices.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 695741b0e420..1831b196c70a 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -53,6 +53,7 @@ struct gianfar_platform_data {
 	u32	bus_id;
 	u32	phy_id;
 	u8	mac_addr[6];
+	phy_interface_t interface;
 };
 
 struct gianfar_mdio_data {
-- 
cgit v1.2.3


From b4ff4f0419ae5db83553fab79d03a89c10d540a8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 18 Jul 2007 15:46:06 -0700
Subject: [NETLINK]: allocate group bitmaps dynamically

Allow changing the number of groups for a netlink family
after it has been created, use RCU to protect the listeners
bitmap keeping netlink_has_listeners() lock-free.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 2e23353c28a5..b971ddd24090 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -161,6 +161,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
 					  void (*input)(struct sock *sk, int len),
 					  struct mutex *cb_mutex,
 					  struct module *module);
+extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
-- 
cgit v1.2.3


From 84659eb529b33572bb3f8c94e0978bd5d084bc7e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 18 Jul 2007 15:47:05 -0700
Subject: [NETLIKN]: Allow removing multicast groups.

Allow kicking listeners out of a multicast group when necessary
(for example if that group is going to be removed.)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b971ddd24090..83d8239f0cce 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -162,6 +162,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
 					  struct mutex *cb_mutex,
 					  struct module *module);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
-- 
cgit v1.2.3


From 2dbba6f773d1e1e4c78f03b0dbf19790d9017693 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 18 Jul 2007 15:47:52 -0700
Subject: [GENETLINK]: Dynamic multicast groups.

Introduce API to dynamically register and unregister multicast groups.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index f7a93770e1be..7da02c93002b 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -39,6 +39,9 @@ enum {
 	CTRL_CMD_NEWOPS,
 	CTRL_CMD_DELOPS,
 	CTRL_CMD_GETOPS,
+	CTRL_CMD_NEWMCAST_GRP,
+	CTRL_CMD_DELMCAST_GRP,
+	CTRL_CMD_GETMCAST_GRP, /* unused */
 	__CTRL_CMD_MAX,
 };
 
@@ -52,6 +55,7 @@ enum {
 	CTRL_ATTR_HDRSIZE,
 	CTRL_ATTR_MAXATTR,
 	CTRL_ATTR_OPS,
+	CTRL_ATTR_MCAST_GROUPS,
 	__CTRL_ATTR_MAX,
 };
 
@@ -66,4 +70,13 @@ enum {
 
 #define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
 
+enum {
+	CTRL_ATTR_MCAST_GRP_UNSPEC,
+	CTRL_ATTR_MCAST_GRP_NAME,
+	CTRL_ATTR_MCAST_GRP_ID,
+	__CTRL_ATTR_MCAST_GRP_MAX,
+};
+
+#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+
 #endif	/* __LINUX_GENERIC_NETLINK_H */
-- 
cgit v1.2.3


From 60a96a59569bab85571d0089682109bd3324e896 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Sun, 8 Jul 2007 22:29:26 +0200
Subject: Driver core: accept all valid action-strings in uevent-trigger

This allows the uevent file to handle any type of uevent action to be
triggered by userspace instead of just the "add" uevent.


Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 06cbf41d32d2..aa2fe22b1baa 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -36,15 +36,24 @@ extern char uevent_helper[];
 /* counter to tag the uevent, read only except for the kobject core */
 extern u64 uevent_seqnum;
 
-/* the actions here must match the proper string in lib/kobject_uevent.c */
-typedef int __bitwise kobject_action_t;
+/*
+ * The actions here must match the index to the string array
+ * in lib/kobject_uevent.c
+ *
+ * Do not add new actions here without checking with the driver-core
+ * maintainers. Action strings are not meant to express subsystem
+ * or device specific properties. In most cases you want to send a
+ * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event
+ * specific variables added to the event environment.
+ */
 enum kobject_action {
-	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* exclusive to core */
-	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* exclusive to core */
-	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* device state change */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* device offline */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* device online */
-	KOBJ_MOVE	= (__force kobject_action_t) 0x06,	/* device move */
+	KOBJ_ADD,
+	KOBJ_REMOVE,
+	KOBJ_CHANGE,
+	KOBJ_MOVE,
+	KOBJ_ONLINE,
+	KOBJ_OFFLINE,
+	KOBJ_MAX
 };
 
 struct kobject {
-- 
cgit v1.2.3


From 3f8df781fc5f9ee5253a54ba669e1c8872844b86 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 12 Jul 2007 16:57:22 -0400
Subject: PM: remove deprecated dpm_runtime_* routines

This patch (as933) removes the deprecated dpm_runtime_suspend() and
dpm_runtime_resume() routines from the PM core.  The only user of
those routines is the PCMCIA ds driver; local replacements are added.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 273781c82e4d..2735b7cadd20 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -284,8 +284,6 @@ extern int device_prepare_suspend(pm_message_t state);
 #define device_may_wakeup(dev) \
 	(device_can_wakeup(dev) && (dev)->power.should_wakeup)
 
-extern int dpm_runtime_suspend(struct device *, pm_message_t);
-extern void dpm_runtime_resume(struct device *);
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
 #define suspend_report_result(fn, ret)					\
@@ -317,15 +315,6 @@ static inline int device_suspend(pm_message_t state)
 #define device_set_wakeup_enable(dev,val)	do{}while(0)
 #define device_may_wakeup(dev)			(0)
 
-static inline int dpm_runtime_suspend(struct device * dev, pm_message_t state)
-{
-	return 0;
-}
-
-static inline void dpm_runtime_resume(struct device * dev)
-{
-}
-
 #define suspend_report_result(fn, ret) do { } while (0)
 
 static inline int call_platform_enable_wakeup(struct device *dev, int is_on)
-- 
cgit v1.2.3


From aebdc3b450a3febf7d7d00cd2235509055ec7082 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Thu, 12 Jul 2007 22:08:22 -0700
Subject: dev_vdbg(), available with -DVERBOSE_DEBUG

This defines a dev_vdbg() call, which is enabled with -DVERBOSE_DEBUG.
When enabled, dev_vdbg() acts just like dev_dbg().  When disabled, it is a
NOP ...  just like dev_dbg() without -DDEBUG.  The specific code was moved
out of a USB patch, but lots of drivers have similar support.

That is, code can now be written to use an additional level of debug
output, selected at compile time.  Many driver authors have found this
idiom to be very useful.  A typical usage model is for "normal" debug
messages to focus on fault paths and not be very "chatty", so that those
messages can be left on during normal operation without much of a
performance or syslog load.  On the other hand "verbose" messages would be
noisy enough that they wouldn't normally be enabled; they might even affect
timings enough to change system or driver behavior.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index be2debed70d2..d9f0a57f5a2f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -572,6 +572,16 @@ dev_dbg(struct device * dev, const char * fmt, ...)
 }
 #endif
 
+#ifdef VERBOSE_DEBUG
+#define dev_vdbg	dev_dbg
+#else
+static inline int __attribute__ ((format (printf, 2, 3)))
+dev_vdbg(struct device * dev, const char * fmt, ...)
+{
+	return 0;
+}
+#endif
+
 #define dev_err(dev, format, arg...)		\
 	dev_printk(KERN_ERR , dev , format , ## arg)
 #define dev_info(dev, format, arg...)		\
-- 
cgit v1.2.3


From beafc54c4e2fba24e1ca45cdb7f79d9aa83e3db1 Mon Sep 17 00:00:00 2001
From: "Hans J. Koch" <hjk@linutronix.de>
Date: Thu, 7 Dec 2006 10:58:29 +0100
Subject: UIO: Add the User IO core code

This interface allows the ability to write the majority of a driver in
userspace with only a very small shell of a driver in the kernel itself.
It uses a char device and sysfs to interact with a userspace process to
process interrupts and control memory accesses.

See the docbook documentation for more details on how to use this
interface.

From: Hans J. Koch <hjk@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/uio_driver.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 include/linux/uio_driver.h

(limited to 'include/linux')

diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
new file mode 100644
index 000000000000..44c28e94df50
--- /dev/null
+++ b/include/linux/uio_driver.h
@@ -0,0 +1,91 @@
+/*
+ * include/linux/uio_driver.h
+ *
+ * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de>
+ * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de>
+ * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com>
+ *
+ * Userspace IO driver.
+ *
+ * Licensed under the GPLv2 only.
+ */
+
+#ifndef _UIO_DRIVER_H_
+#define _UIO_DRIVER_H_
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+
+/**
+ * struct uio_mem - description of a UIO memory region
+ * @kobj:		kobject for this mapping
+ * @addr:		address of the device's memory
+ * @size:		size of IO
+ * @memtype:		type of memory addr points to
+ * @internal_addr:	ioremap-ped version of addr, for driver internal use
+ */
+struct uio_mem {
+	struct kobject		kobj;
+	unsigned long		addr;
+	unsigned long		size;
+	int			memtype;
+	void __iomem		*internal_addr;
+};
+
+#define MAX_UIO_MAPS 	5
+
+struct uio_device;
+
+/**
+ * struct uio_info - UIO device capabilities
+ * @uio_dev:		the UIO device this info belongs to
+ * @name:		device name
+ * @version:		device driver version
+ * @mem:		list of mappable memory regions, size==0 for end of list
+ * @irq:		interrupt number or UIO_IRQ_CUSTOM
+ * @irq_flags:		flags for request_irq()
+ * @priv:		optional private data
+ * @handler:		the device's irq handler
+ * @mmap:		mmap operation for this uio device
+ * @open:		open operation for this uio device
+ * @release:		release operation for this uio device
+ */
+struct uio_info {
+	struct uio_device	*uio_dev;
+	char			*name;
+	char			*version;
+	struct uio_mem		mem[MAX_UIO_MAPS];
+	long			irq;
+	unsigned long		irq_flags;
+	void			*priv;
+	irqreturn_t (*handler)(int irq, struct uio_info *dev_info);
+	int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);
+	int (*open)(struct uio_info *info, struct inode *inode);
+	int (*release)(struct uio_info *info, struct inode *inode);
+};
+
+extern int __must_check
+	__uio_register_device(struct module *owner,
+			      struct device *parent,
+			      struct uio_info *info);
+static inline int __must_check
+	uio_register_device(struct device *parent, struct uio_info *info)
+{
+	return __uio_register_device(THIS_MODULE, parent, info);
+}
+extern void uio_unregister_device(struct uio_info *info);
+extern void uio_event_notify(struct uio_info *info);
+
+/* defines for uio_device->irq */
+#define UIO_IRQ_CUSTOM	-1
+#define UIO_IRQ_NONE	-2
+
+/* defines for uio_device->memtype */
+#define UIO_MEM_NONE	0
+#define UIO_MEM_PHYS	1
+#define UIO_MEM_LOGICAL	2
+#define UIO_MEM_VIRTUAL 3
+
+#endif /* _LINUX_UIO_DRIVER_H_ */
-- 
cgit v1.2.3


From a9933cea7a1d80dd9efae9f1acd857f5dce742b9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 7 Jun 2007 17:09:49 -0400
Subject: locks: rename lease functions to reflect locks.c conventions

We've been using the convention that vfs_foo is the function that calls
a filesystem-specific foo method if it exists, or falls back on a
generic method if it doesn't; thus vfs_foo is what is called when some
other part of the kernel (normally lockd or nfsd) wants to get a lock,
whereas foo is what filesystems call to use the underlying local
functionality as part of their lock implementation.

So rename setlease to vfs_setlease (which will call a
filesystem-specific setlease after a later patch) and __setlease to
setlease.

Also, vfs_setlease need only be GPL-exported as long as it's only needed
by lockd and nfsd.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98205f680476..a24f029accc0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,7 +872,7 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
-extern int setlease(struct file *, long, struct file_lock **);
+extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
-- 
cgit v1.2.3


From f9ffed26d6f3e6ac9988947242821579d615fda7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 14 Nov 2006 15:51:40 -0500
Subject: locks: provide a file lease method enabling cluster-coherent leases

Currently leases are only kept locally, so there's no way for a distributed
filesystem to enforce them against multiple clients.  We're particularly
interested in the case of nfsd exporting a cluster filesystem, in which
case nfsd needs cluster-coherent leases in order to implement delegations
correctly.

Also add some documentation.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a24f029accc0..c8ddf34e9710 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1122,6 +1122,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+	int (*setlease)(struct file *, long, struct file_lock **);
 };
 
 struct inode_operations {
-- 
cgit v1.2.3


From 4698afe8e3a725576366f86560a8a8242b21b9f7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 4 Jul 2007 17:21:37 -0400
Subject: locks: export setlease to filesystems

Export setlease so it can used by filesystems to implement their lease
methods.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8ddf34e9710..b188c2e5338d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,6 +872,7 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
+extern int setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
-- 
cgit v1.2.3


From 6d34ac199a4af5c678a3a8f3275aeb2586b72da3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 11 May 2007 16:09:32 -0400
Subject: locks: make posix_test_lock() interface more consistent

Since posix_test_lock(), like fcntl() and ->lock(), indicates absence or
presence of a conflict lock by setting fl_type to, respectively, F_UNLCK
or something other than F_UNLCK, the return value is no longer needed.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b188c2e5338d..80deaaf1b746 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -862,7 +862,7 @@ extern void locks_init_lock(struct file_lock *);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
-extern int posix_test_lock(struct file *, struct file_lock *);
+extern void posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
-- 
cgit v1.2.3


From 5417169026c3df151adf5a65eb061278b0a72e69 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 19 Jul 2007 17:39:55 +1000
Subject: [FS] Implement block_page_mkwrite.

Many filesystems need a ->page-mkwrite callout to correctly
set up pages that have been written to by mmap. This is especially
important when mmap is writing into holes as it allows filesystems
to correctly account for and allocate space before the mmap
write is allowed to proceed.

Protection against truncate races is provided by locking the page
and checking to see whether the page mapping is correct and whether
it is beyond EOF so we don't end up allowing allocations beyond
the current EOF or changing EOF as a result of a mmap write.

SGI-PV: 940392
SGI-Modid: 2.6.x-xfs-melb:linux:29146a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 include/linux/buffer_head.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5c6e12853a9b..35cadad84b14 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -209,6 +209,8 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 int generic_cont_expand(struct inode *inode, loff_t size);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
+int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+				get_block_t get_block);
 void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
-- 
cgit v1.2.3


From d00806b183152af6d24f46f0c33f14162ca1262a Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 19 Jul 2007 01:46:57 -0700
Subject: mm: fix fault vs invalidate race for linear mappings

Fix the race between invalidate_inode_pages and do_no_page.

Andrea Arcangeli identified a subtle race between invalidation of pages from
pagecache with userspace mappings, and do_no_page.

The issue is that invalidation has to shoot down all mappings to the page,
before it can be discarded from the pagecache.  Between shooting down ptes to
a particular page, and actually dropping the struct page from the pagecache,
do_no_page from any process might fault on that page and establish a new
mapping to the page just before it gets discarded from the pagecache.

The most common case where such invalidation is used is in file truncation.
This case was catered for by doing a sort of open-coded seqlock between the
file's i_size, and its truncate_count.

Truncation will decrease i_size, then increment truncate_count before
unmapping userspace pages; do_no_page will read truncate_count, then find the
page if it is within i_size, and then check truncate_count under the page
table lock and back out and retry if it had subsequently been changed (ptl
will serialise against unmapping, and ensure a potentially updated
truncate_count is actually visible).

Complexity and documentation issues aside, the locking protocol fails in the
case where we would like to invalidate pagecache inside i_size.  do_no_page
can come in anytime and filemap_nopage is not aware of the invalidation in
progress (as it is when it is outside i_size).  The end result is that
dangling (->mapping == NULL) pages that appear to be from a particular file
may be mapped into userspace with nonsense data.  Valid mappings to the same
place will see a different page.

Andrea implemented two working fixes, one using a real seqlock, another using
a page->flags bit.  He also proposed using the page lock in do_no_page, but
that was initially considered too heavyweight.  However, it is not a global or
per-file lock, and the page cacheline is modified in do_no_page to increment
_count and _mapcount anyway, so a further modification should not be a large
performance hit.  Scalability is not an issue.

This patch implements this latter approach.  ->nopage implementations return
with the page locked if it is possible for their underlying file to be
invalidated (in that case, they must set a special vm_flags bit to indicate
so).  do_no_page only unlocks the page after setting up the mapping
completely.  invalidation is excluded because it holds the page lock during
invalidation of each page (and ensures that the page is not mapped while
holding the lock).

This also allows significant simplifications in do_no_page, because we have
the page locked in the right place in the pagecache from the start.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a5c451816fdc..ca9536a348c8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -168,6 +168,12 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
+#define VM_CAN_INVALIDATE 0x08000000	/* The mapping may be invalidated,
+					 * eg. truncate or invalidate_inode_*.
+					 * In this case, do_no_page must
+					 * return with the page locked.
+					 */
+
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
 #endif
-- 
cgit v1.2.3


From 54cb8821de07f2ffcd28c380ce9b93d5784b40d7 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 19 Jul 2007 01:46:59 -0700
Subject: mm: merge populate and nopage into fault (fixes nonlinear)

Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes
the virtual address -> file offset differently from linear mappings.

->populate is a layering violation because the filesystem/pagecache code
should need to know anything about the virtual memory mapping.  The hitch here
is that the ->nopage handler didn't pass down enough information (ie.  pgoff).
 But it is more logical to pass pgoff rather than have the ->nopage function
calculate it itself anyway (because that's a similar layering violation).

Having the populate handler install the pte itself is likewise a nasty thing
to be doing.

This patch introduces a new fault handler that replaces ->nopage and
->populate and (later) ->nopfn.  Most of the old mechanism is still in place
so there is a lot of duplication and nice cleanups that can be removed if
everyone switches over.

The rationale for doing this in the first place is that nonlinear mappings are
subject to the pagefault vs invalidate/truncate race too, and it seemed stupid
to duplicate the synchronisation logic rather than just consolidate the two.

After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in
pagecache.  Seems like a fringe functionality anyway.

NOPAGE_REFAULT is removed.  This should be implemented with ->fault, and no
users have hit mainline yet.

[akpm@linux-foundation.org: cleanup]
[randy.dunlap@oracle.com: doc. fixes for readahead]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ca9536a348c8..f28a1b3e63a9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -173,6 +173,7 @@ extern unsigned int kobjsize(const void *objp);
 					 * In this case, do_no_page must
 					 * return with the page locked.
 					 */
+#define VM_CAN_NONLINEAR 0x10000000	/* Has ->fault & does nonlinear pages */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -196,6 +197,25 @@ extern unsigned int kobjsize(const void *objp);
  */
 extern pgprot_t protection_map[16];
 
+#define FAULT_FLAG_WRITE	0x01
+#define FAULT_FLAG_NONLINEAR	0x02
+
+/*
+ * fault_data is filled in the the pagefault handler and passed to the
+ * vma's ->fault function. That function is responsible for filling in
+ * 'type', which is the type of fault if a page is returned, or the type
+ * of error if NULL is returned.
+ *
+ * pgoff should be used in favour of address, if possible. If pgoff is
+ * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get
+ * nonlinear mapping support.
+ */
+struct fault_data {
+	unsigned long address;
+	pgoff_t pgoff;
+	unsigned int flags;
+	int type;
+};
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -205,9 +225,15 @@ extern pgprot_t protection_map[16];
 struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
-	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
-	unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
-	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+	struct page *(*fault)(struct vm_area_struct *vma,
+			struct fault_data *fdata);
+	struct page *(*nopage)(struct vm_area_struct *area,
+			unsigned long address, int *type);
+	unsigned long (*nopfn)(struct vm_area_struct *area,
+			unsigned long address);
+	int (*populate)(struct vm_area_struct *area, unsigned long address,
+			unsigned long len, pgprot_t prot, unsigned long pgoff,
+			int nonblock);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
@@ -661,7 +687,6 @@ static inline int page_mapped(struct page *page)
  */
 #define NOPAGE_SIGBUS	(NULL)
 #define NOPAGE_OOM	((struct page *) (-1))
-#define NOPAGE_REFAULT	((struct page *) (-2))	/* Return to userspace, rerun */
 
 /*
  * Error return values for the *_nopfn functions
@@ -1110,9 +1135,11 @@ extern void truncate_inode_pages_range(struct address_space *,
 				       loff_t lstart, loff_t lend);
 
 /* generic vm_area_ops exported for stackable file systems */
-extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
-extern int filemap_populate(struct vm_area_struct *, unsigned long,
-		unsigned long, pgprot_t, unsigned long, int);
+extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *);
+extern struct page * __deprecated_for_modules
+filemap_nopage(struct vm_area_struct *, unsigned long, int *);
+extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
+		unsigned long, unsigned long, pgprot_t, unsigned long, int);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
-- 
cgit v1.2.3


From d0217ac04ca6591841e5665f518e38064f4e65bd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 19 Jul 2007 01:47:03 -0700
Subject: mm: fault feedback #1

Change ->fault prototype.  We now return an int, which contains
VM_FAULT_xxx code in the low byte, and FAULT_RET_xxx code in the next byte.
 FAULT_RET_ code tells the VM whether a page was found, whether it has been
locked, and potentially other things.  This is not quite the way he wanted
it yet, but that's changed in the next patch (which requires changes to
arch code).

This means we no longer set VM_CAN_INVALIDATE in the vma in order to say
that a page is locked which requires filemap_nopage to go away (because we
can no longer remain backward compatible without that flag), but we were
going to do that anyway.

struct fault_data is renamed to struct vm_fault as Linus asked. address
is now a void __user * that we should firmly encourage drivers not to use
without really good reason.

The page is now returned via a page pointer in the vm_fault struct.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 84 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f28a1b3e63a9..ff0b8844bd5a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -168,12 +168,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
-#define VM_CAN_INVALIDATE 0x08000000	/* The mapping may be invalidated,
-					 * eg. truncate or invalidate_inode_*.
-					 * In this case, do_no_page must
-					 * return with the page locked.
-					 */
-#define VM_CAN_NONLINEAR 0x10000000	/* Has ->fault & does nonlinear pages */
+#define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -197,24 +192,44 @@ extern unsigned int kobjsize(const void *objp);
  */
 extern pgprot_t protection_map[16];
 
-#define FAULT_FLAG_WRITE	0x01
-#define FAULT_FLAG_NONLINEAR	0x02
+#define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
+#define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
+
+
+#define FAULT_RET_NOPAGE	0x0100	/* ->fault did not return a page. This
+					 * can be used if the handler installs
+					 * their own pte.
+					 */
+#define FAULT_RET_LOCKED	0x0200	/* ->fault locked the page, caller must
+					 * unlock after installing the mapping.
+					 * This is used by pagecache in
+					 * particular, where the page lock is
+					 * used to synchronise against truncate
+					 * and invalidate. Mutually exclusive
+					 * with FAULT_RET_NOPAGE.
+					 */
 
 /*
- * fault_data is filled in the the pagefault handler and passed to the
- * vma's ->fault function. That function is responsible for filling in
- * 'type', which is the type of fault if a page is returned, or the type
- * of error if NULL is returned.
+ * vm_fault is filled by the the pagefault handler and passed to the vma's
+ * ->fault function. The vma's ->fault is responsible for returning the
+ * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed
+ * with FAULT_RET_ flags that occupy the next byte and give details about
+ * how the fault was handled.
  *
- * pgoff should be used in favour of address, if possible. If pgoff is
- * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get
- * nonlinear mapping support.
+ * pgoff should be used in favour of virtual_address, if possible. If pgoff
+ * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear
+ * mapping support.
  */
-struct fault_data {
-	unsigned long address;
-	pgoff_t pgoff;
-	unsigned int flags;
-	int type;
+struct vm_fault {
+	unsigned int flags;		/* FAULT_FLAG_xxx flags */
+	pgoff_t pgoff;			/* Logical page offset based on vma */
+	void __user *virtual_address;	/* Faulting virtual address */
+
+	struct page *page;		/* ->fault handlers should return a
+					 * page here, unless FAULT_RET_NOPAGE
+					 * is set (which is also implied by
+					 * VM_FAULT_OOM or SIGBUS).
+					 */
 };
 
 /*
@@ -225,15 +240,11 @@ struct fault_data {
 struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
-	struct page *(*fault)(struct vm_area_struct *vma,
-			struct fault_data *fdata);
+	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	struct page *(*nopage)(struct vm_area_struct *area,
 			unsigned long address, int *type);
 	unsigned long (*nopfn)(struct vm_area_struct *area,
 			unsigned long address);
-	int (*populate)(struct vm_area_struct *area, unsigned long address,
-			unsigned long len, pgprot_t prot, unsigned long pgoff,
-			int nonblock);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
@@ -700,8 +711,14 @@ static inline int page_mapped(struct page *page)
  * Used to decide whether a process gets delivered SIGBUS or
  * just gets major/minor fault counters bumped up.
  */
-#define VM_FAULT_OOM	0x00
-#define VM_FAULT_SIGBUS	0x01
+
+/*
+ * VM_FAULT_ERROR is set for the error cases, to make some tests simpler.
+ */
+#define VM_FAULT_ERROR	0x20
+
+#define VM_FAULT_OOM	(0x00 | VM_FAULT_ERROR)
+#define VM_FAULT_SIGBUS	(0x01 | VM_FAULT_ERROR)
 #define VM_FAULT_MINOR	0x02
 #define VM_FAULT_MAJOR	0x03
 
@@ -711,6 +728,11 @@ static inline int page_mapped(struct page *page)
  */
 #define VM_FAULT_WRITE	0x10
 
+/*
+ * Mask of VM_FAULT_ flags
+ */
+#define VM_FAULT_MASK	0xff
+
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 extern void show_free_areas(void);
@@ -793,8 +815,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
-extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
-extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
 
 #ifdef CONFIG_MMU
 extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma,
@@ -1135,11 +1155,7 @@ extern void truncate_inode_pages_range(struct address_space *,
 				       loff_t lstart, loff_t lend);
 
 /* generic vm_area_ops exported for stackable file systems */
-extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *);
-extern struct page * __deprecated_for_modules
-filemap_nopage(struct vm_area_struct *, unsigned long, int *);
-extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
-		unsigned long, unsigned long, pgprot_t, unsigned long, int);
+extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
-- 
cgit v1.2.3


From 83c54070ee1a2d05c89793884bea1a03f2851ed4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 19 Jul 2007 01:47:05 -0700
Subject: mm: fault feedback #2

This patch completes Linus's wish that the fault return codes be made into
bit flags, which I agree makes everything nicer.  This requires requires
all handle_mm_fault callers to be modified (possibly the modifications
should go further and do things like fault accounting in handle_mm_fault --
however that would be for another patch).

[akpm@linux-foundation.org: fix alpha build]
[akpm@linux-foundation.org: fix s390 build]
[akpm@linux-foundation.org: fix sparc build]
[akpm@linux-foundation.org: fix sparc64 build]
[akpm@linux-foundation.org: fix ia64 build]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ian Molton <spyro@f2s.com>
Cc: Bryan Wu <bryan.wu@analog.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: Matthew Wilcox <willy@debian.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Richard Curnow <rc@rc0.org.uk>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp>
Cc: Chris Zankel <chris@zankel.net>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Still apparently needs some ARM and PPC loving - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 58 ++++++++++++------------------------------------------
 1 file changed, 13 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ff0b8844bd5a..f8e12b3b6110 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -196,25 +196,10 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 
 
-#define FAULT_RET_NOPAGE	0x0100	/* ->fault did not return a page. This
-					 * can be used if the handler installs
-					 * their own pte.
-					 */
-#define FAULT_RET_LOCKED	0x0200	/* ->fault locked the page, caller must
-					 * unlock after installing the mapping.
-					 * This is used by pagecache in
-					 * particular, where the page lock is
-					 * used to synchronise against truncate
-					 * and invalidate. Mutually exclusive
-					 * with FAULT_RET_NOPAGE.
-					 */
-
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
- * ->fault function. The vma's ->fault is responsible for returning the
- * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed
- * with FAULT_RET_ flags that occupy the next byte and give details about
- * how the fault was handled.
+ * ->fault function. The vma's ->fault is responsible for returning a bitmask
+ * of VM_FAULT_xxx flags that give details about how the fault was handled.
  *
  * pgoff should be used in favour of virtual_address, if possible. If pgoff
  * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear
@@ -226,9 +211,9 @@ struct vm_fault {
 	void __user *virtual_address;	/* Faulting virtual address */
 
 	struct page *page;		/* ->fault handlers should return a
-					 * page here, unless FAULT_RET_NOPAGE
+					 * page here, unless VM_FAULT_NOPAGE
 					 * is set (which is also implied by
-					 * VM_FAULT_OOM or SIGBUS).
+					 * VM_FAULT_ERROR).
 					 */
 };
 
@@ -712,26 +697,17 @@ static inline int page_mapped(struct page *page)
  * just gets major/minor fault counters bumped up.
  */
 
-/*
- * VM_FAULT_ERROR is set for the error cases, to make some tests simpler.
- */
-#define VM_FAULT_ERROR	0x20
+#define VM_FAULT_MINOR	0 /* For backwards compat. Remove me quickly. */
 
-#define VM_FAULT_OOM	(0x00 | VM_FAULT_ERROR)
-#define VM_FAULT_SIGBUS	(0x01 | VM_FAULT_ERROR)
-#define VM_FAULT_MINOR	0x02
-#define VM_FAULT_MAJOR	0x03
+#define VM_FAULT_OOM	0x0001
+#define VM_FAULT_SIGBUS	0x0002
+#define VM_FAULT_MAJOR	0x0004
+#define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
 
-/* 
- * Special case for get_user_pages.
- * Must be in a distinct bit from the above VM_FAULT_ flags.
- */
-#define VM_FAULT_WRITE	0x10
+#define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
+#define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 
-/*
- * Mask of VM_FAULT_ flags
- */
-#define VM_FAULT_MASK	0xff
+#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS)
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
@@ -817,16 +793,8 @@ extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 #ifdef CONFIG_MMU
-extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma,
+extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, int write_access);
-
-static inline int handle_mm_fault(struct mm_struct *mm,
-			struct vm_area_struct *vma, unsigned long address,
-			int write_access)
-{
-	return __handle_mm_fault(mm, vma, address, write_access) &
-				(~VM_FAULT_WRITE);
-}
 #else
 static inline int handle_mm_fault(struct mm_struct *mm,
 			struct vm_area_struct *vma, unsigned long address,
-- 
cgit v1.2.3


From bb2d5ce16409efcdf94017a6b6fecd468226e29c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 19 Jul 2007 01:47:23 -0700
Subject: Remove alloc_zeroed_user_highpage()

alloc_zeroed_user_highpage() has no in-tree users and it is not exported.
As it is not exported, it can simply be removed.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 12c5e4e3135a..1fcb0033179e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -102,21 +102,6 @@ __alloc_zeroed_user_highpage(gfp_t movableflags,
 }
 #endif
 
-/**
- * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA
- * @vma: The VMA the page is to be allocated for
- * @vaddr: The virtual address the page will be inserted into
- *
- * This function will allocate a page for a VMA that the caller knows will
- * not be able to move in the future using move_pages() or reclaim. If it
- * is known that the page can move, use alloc_zeroed_user_highpage_movable
- */
-static inline struct page *
-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
-{
-	return __alloc_zeroed_user_highpage(0, vma, vaddr);
-}
-
 /**
  * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
  * @vma: The VMA the page is to be allocated for
-- 
cgit v1.2.3


From a634cc10164d1c229fbeca33923e6a0ed939e894 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 19 Jul 2007 01:47:30 -0700
Subject: swsusp: introduce restore platform operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At least on some machines it is necessary to prepare the ACPI firmware for the
restoration of the system memory state from the hibernation image if the
"platform" mode of hibernation has been used.  Namely, in that cases we need
to disable the GPEs before replacing the "boot" kernel with the "frozen"
kernel (cf.  http://bugzilla.kernel.org/show_bug.cgi?id=7887).  After the
restore they will be re-enabled by hibernation_ops->finish(), but if the
restore fails, they have to be re-enabled by the restore code explicitly.

For this purpose we can introduce two additional hibernation operations,
called pre_restore() and restore_cleanup() and call them from the restore code
path.  Still, they should be called if the "platform" mode of hibernation has
been used, so we need to pass the information about the hibernation mode from
the "frozen" kernel to the "boot" kernel in the image header.

Apparently, we can't drop the disabling of GPEs before the restore because of
Bug #7887 .   We also can't do it unconditionally, because the GPEs wouldn't
have been enabled after a successful restore if the suspend had been done in
the 'shutdown' or 'reboot' mode.

In principle we could (and probably should) unconditionally disable the GPEs
before each snapshot creation *and* before the restore, but then we'd have to
unconditionally enable them after the snapshot creation as well as after the
restore (or restore failure)   Still, for this purpose we'd need to modify
acpi_enter_sleep_state_prep() and acpi_leave_sleep_state() and we'd have to
introduce some mechanism synchronizing the disablind/enabling of the GPEs with
the device drivers' .suspend()/.resume() routines and with
disable_/enable_nonboot_cpus().   However, this would have affected the
suspend (ie.  s2ram) code as well as the hibernation, which I'd like to avoid
in this patch series.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/suspend.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 9c7cb6430666..d235c146da2b 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -43,11 +43,15 @@ static inline void pm_restore_console(void) {}
  * @prepare: prepare system for hibernation
  * @enter: shut down system after state has been saved to disk
  * @finish: finish/clean up after state has been reloaded
+ * @pre_restore: prepare system for the restoration from a hibernation image
+ * @restore_cleanup: clean up after a failing image restoration
  */
 struct hibernation_ops {
 	int (*prepare)(void);
 	int (*enter)(void);
 	void (*finish)(void);
+	int (*pre_restore)(void);
+	void (*restore_cleanup)(void);
 };
 
 #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
-- 
cgit v1.2.3


From 0c1eecfb345401629aa57c9d3b077273e56c45a7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 19 Jul 2007 01:47:33 -0700
Subject: Freezer: avoid freezing kernel threads prematurely

Kernel threads should not have TIF_FREEZE set when user space processes are
being frozen, since otherwise some of them might be frozen prematurely.
To prevent this from happening we can (1) make exit_mm() unset TIF_FREEZE
unconditionally just after clearing tsk->mm and (2) make try_to_freeze_tasks()
check if p->mm is different from zero and PF_BORROWED_MM is unset in p->flags
when user space processes are to be frozen.

Namely, when user space processes are being frozen, we only should set
TIF_FREEZE for tasks that have p->mm different from NULL and don't have
PF_BORROWED_MM set in p->flags.  For this reason task_lock() must be used to
prevent try_to_freeze_tasks() from racing with use_mm()/unuse_mm(), in which
p->mm and p->flags.PF_BORROWED_MM are changed under task_lock(p).  Also, we
need to prevent the following scenario from happening:

* daemonize() is called by a task spawned from a user space code path
* freezer checks if the task has p->mm set and the result is positive
* task enters exit_mm() and clears its TIF_FREEZE
* freezer sets TIF_FREEZE for the task
* task calls try_to_freeze() and goes to the refrigerator, which is wrong at
  that point

This requires us to acquire task_lock(p) before p->flags.PF_BORROWED_MM and
p->mm are examined and release it after TIF_FREEZE is set for p (or it turns
out that TIF_FREEZE should not be set).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/freezer.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 2d38b1a74662..c8e02de737f6 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -25,7 +25,7 @@ static inline int freezing(struct task_struct *p)
 /*
  * Request that a process be frozen
  */
-static inline void freeze(struct task_struct *p)
+static inline void set_freeze_flag(struct task_struct *p)
 {
 	set_tsk_thread_flag(p, TIF_FREEZE);
 }
@@ -33,7 +33,7 @@ static inline void freeze(struct task_struct *p)
 /*
  * Sometimes we may need to cancel the previous 'freeze' request
  */
-static inline void do_not_freeze(struct task_struct *p)
+static inline void clear_freeze_flag(struct task_struct *p)
 {
 	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
@@ -56,7 +56,7 @@ static inline int thaw_process(struct task_struct *p)
 		wake_up_process(p);
 		return 1;
 	}
-	clear_tsk_thread_flag(p, TIF_FREEZE);
+	clear_freeze_flag(p);
 	task_unlock(p);
 	return 0;
 }
@@ -129,7 +129,8 @@ static inline void set_freezable(void)
 #else
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
-static inline void freeze(struct task_struct *p) { BUG(); }
+static inline void set_freeze_flag(struct task_struct *p) {}
+static inline void clear_freeze_flag(struct task_struct *p) {}
 static inline int thaw_process(struct task_struct *p) { return 1; }
 
 static inline void refrigerator(void) {}
-- 
cgit v1.2.3


From b10d911749d37dccfa5873d2088aea3f074b9e45 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 19 Jul 2007 01:47:36 -0700
Subject: PM: introduce hibernation and suspend notifiers

Make it possible to register hibernation and suspend notifiers, so that
subsystems can perform hibernation-related or suspend-related operations that
should not be carried out by device drivers' .suspend() and .resume()
routines.

[akpm@linux-foundation.org: build fixes]
[akpm@linux-foundation.org: cleanups]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/notifier.h |  6 ++++++
 include/linux/suspend.h  | 48 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 576f2bb34cc8..be3f2bb6fcf3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -212,5 +212,11 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
 #define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
 
+/* Hibernation and suspend events */
+#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
+#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
+#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
+#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d235c146da2b..e8e6da394c92 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -54,7 +54,8 @@ struct hibernation_ops {
 	void (*restore_cleanup)(void);
 };
 
-#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+#ifdef CONFIG_PM
+#ifdef CONFIG_SOFTWARE_SUSPEND
 /* kernel/power/snapshot.c */
 extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
 static inline void register_nosave_region(unsigned long b, unsigned long e)
@@ -72,16 +73,14 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 
 extern void hibernation_set_ops(struct hibernation_ops *ops);
 extern int hibernate(void);
-#else
-static inline void register_nosave_region(unsigned long b, unsigned long e) {}
-static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
+#else /* CONFIG_SOFTWARE_SUSPEND */
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
 
 static inline void hibernation_set_ops(struct hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
-#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
+#endif /* CONFIG_SOFTWARE_SUSPEND */
 
 void save_processor_state(void);
 void restore_processor_state(void);
@@ -89,4 +88,43 @@ struct saved_context;
 void __save_processor_state(struct saved_context *ctxt);
 void __restore_processor_state(struct saved_context *ctxt);
 
+/* kernel/power/main.c */
+extern struct blocking_notifier_head pm_chain_head;
+
+static inline int register_pm_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+
+#define pm_notifier(fn, pri) {				\
+	static struct notifier_block fn##_nb =			\
+		{ .notifier_call = fn, .priority = pri };	\
+	register_pm_notifier(&fn##_nb);			\
+}
+#else /* CONFIG_PM */
+
+static inline int register_pm_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+#define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
+#endif /* CONFIG_PM */
+
+#if !defined CONFIG_SOFTWARE_SUSPEND || !defined(CONFIG_PM)
+static inline void register_nosave_region(unsigned long b, unsigned long e)
+{
+}
+#endif
+
 #endif /* _LINUX_SWSUSP_H */
-- 
cgit v1.2.3


From bd804eba1c8597cbb7cd5a5f9fe886aae16a079a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 19 Jul 2007 01:47:40 -0700
Subject: PM: Introduce pm_power_off_prepare

Introduce the pm_power_off_prepare() callback that can be registered by the
interested platforms in analogy with pm_idle() and pm_power_off(), used for
preparing the system to power off (needed by ACPI).

This allows us to drop acpi_sysclass and device_acpi that are only defined in
order to register the ACPI power off preparation callback, which is needed by
pm_power_off() registered in a much different way.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2735b7cadd20..ad3cc2eb0d34 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -101,6 +101,7 @@ struct pm_dev
  */
 extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
+extern void (*pm_power_off_prepare)(void);
 
 typedef int __bitwise suspend_state_t;
 
-- 
cgit v1.2.3


From 5a60d6235c8352ade8f2699e72fcdfe853730456 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <nigel@nigel.suspend2.net>
Date: Thu, 19 Jul 2007 01:47:41 -0700
Subject: PM: Optional beeping during resume from suspend to RAM

Add a feature allowing the user to make the system beep during a resume from
suspend to RAM, on x86_64 and i386.

This is useful for the users with broken resume from RAM, so that they can
verify if the control reaches the kernel after a wake-up event.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fccd8b548d93..c0ccdd720363 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -123,6 +123,7 @@ extern int pci_mmcfg_config_num;
 
 extern int sbf_port;
 extern unsigned long acpi_video_flags;
+extern unsigned long s2ram_beep;
 
 #else	/* !CONFIG_ACPI */
 
-- 
cgit v1.2.3


From 77afcf78a2ded9a91838734234949c0ead5feb12 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Thu, 19 Jul 2007 01:47:41 -0700
Subject: PM: Integrate beeping flag with existing acpi_sleep flags

Move "debug during resume from s2ram" into the variable we already use
for real-mode flags to simplify code. It also closes nasty trap for
the user in acpi_sleep_setup; order of parameters actually mattered there,
acpi_sleep=s3_bios,s3_mode doing something different from
acpi_sleep=s3_mode,s3_bios.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c0ccdd720363..dc234c508a6f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -122,8 +122,7 @@ extern struct acpi_mcfg_allocation *pci_mmcfg_config;
 extern int pci_mmcfg_config_num;
 
 extern int sbf_port;
-extern unsigned long acpi_video_flags;
-extern unsigned long s2ram_beep;
+extern unsigned long acpi_realmode_flags;
 
 #else	/* !CONFIG_ACPI */
 
-- 
cgit v1.2.3


From e53252d97e670a38b1d2e9723b48077bba11ddda Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 19 Jul 2007 01:47:51 -0700
Subject: unregister_chrdev() return void

unregister_chrdev() does not return meaningful value.  This patch makes it
return void like most unregister_* functions.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9562a59b3703..75dd16efc9b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1463,7 +1463,7 @@ extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
 extern int register_chrdev(unsigned int, const char *,
 			   const struct file_operations *);
-extern int unregister_chrdev(unsigned int, const char *);
+extern void unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern int chrdev_open(struct inode *, struct file *);
 extern void chrdev_show(struct seq_file *,off_t);
-- 
cgit v1.2.3


From 2ba2d00363975242dee9bb22cf798b487e3cd61e Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 19 Jul 2007 01:47:55 -0700
Subject: AIO sparse fix (type of ki_flags)

Fix type issue reported by latest 'sparse': kiocb.ki_flags should be
"unsigned long" (not "long"), to match bitop type signature.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/aio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index b903fc02bdb7..d10e608f232d 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -86,7 +86,7 @@ struct kioctx;
  */
 struct kiocb {
 	struct list_head	ki_run_list;
-	long			ki_flags;
+	unsigned long		ki_flags;
 	int			ki_users;
 	unsigned		ki_key;		/* id of this request */
 
-- 
cgit v1.2.3


From d77c2d7cc5126639a47d73300b40d461f2811a0f Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:47:55 -0700
Subject: readahead: introduce PG_readahead

Introduce a new page flag: PG_readahead.

It acts as a look-ahead mark, which tells the page reader: Hey, it's time to
invoke the read-ahead logic.  For the sake of I/O pipelining, don't wait until
it runs out of cached pages!

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Steven Pratt <slpratt@austin.ibm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 731cd2ac3227..709d92fd2877 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -83,6 +83,7 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
+#define PG_readahead		13	/* Reminder to do async read-ahead */
 #define PG_compound		14	/* Part of a compound page */
 #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
 
@@ -226,6 +227,10 @@ static inline void SetPageUptodate(struct page *page)
 #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
 #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
 
+#define PageReadahead(page)	test_bit(PG_readahead, &(page)->flags)
+#define SetPageReadahead(page)	set_bit(PG_readahead, &(page)->flags)
+#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags)
+
 #define PageReclaim(page)	test_bit(PG_reclaim, &(page)->flags)
 #define SetPageReclaim(page)	set_bit(PG_reclaim, &(page)->flags)
 #define ClearPageReclaim(page)	clear_bit(PG_reclaim, &(page)->flags)
-- 
cgit v1.2.3


From 5ce1110b92b31d079aa443e967f43a2294e01194 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:47:59 -0700
Subject: readahead: data structure and routines

Extend struct file_ra_state to support the on-demand readahead logic.  Also
define some helpers for it.

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Steven Pratt <slpratt@austin.ibm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75dd16efc9b6..9a5f562abc77 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -695,6 +695,10 @@ struct fown_struct {
 
 /*
  * Track a single file's readahead state
+ *
+ *  ================#============|==================#==================|
+ *                  ^            ^                  ^                  ^
+ *  file_ra_state.la_index    .ra_index   .lookahead_index   .readahead_index
  */
 struct file_ra_state {
 	unsigned long start;		/* Current window */
@@ -704,6 +708,12 @@ struct file_ra_state {
 	unsigned long prev_index;	/* Cache last read() position */
 	unsigned long ahead_start;	/* Ahead window */
 	unsigned long ahead_size;
+
+	pgoff_t la_index;               /* enqueue time */
+	pgoff_t ra_index;               /* begin offset */
+	pgoff_t lookahead_index;        /* time to do next readahead */
+	pgoff_t readahead_index;        /* end offset */
+
 	unsigned long ra_pages;		/* Maximum readahead window */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
 	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
@@ -712,6 +722,60 @@ struct file_ra_state {
 #define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
 #define RA_FLAG_INCACHE 0x02	/* file is already in cache */
 
+/*
+ * Measuring read-ahead sizes.
+ *
+ *                  |----------- readahead size ------------>|
+ *  ===#============|==================#=====================|
+ *     |------- invoke interval ------>|-- lookahead size -->|
+ */
+static inline unsigned long ra_readahead_size(struct file_ra_state *ra)
+{
+	return ra->readahead_index - ra->ra_index;
+}
+
+static inline unsigned long ra_lookahead_size(struct file_ra_state *ra)
+{
+	return ra->readahead_index - ra->lookahead_index;
+}
+
+static inline unsigned long ra_invoke_interval(struct file_ra_state *ra)
+{
+	return ra->lookahead_index - ra->la_index;
+}
+
+/*
+ * Check if @index falls in the readahead windows.
+ */
+static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+	return (index >= ra->la_index &&
+		index <  ra->readahead_index);
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static inline void ra_set_index(struct file_ra_state *ra,
+				pgoff_t la_index, pgoff_t ra_index)
+{
+	ra->la_index = la_index;
+	ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static inline void ra_set_size(struct file_ra_state *ra,
+				unsigned long ra_size, unsigned long la_size)
+{
+	ra->readahead_index = ra->ra_index + ra_size;
+	ra->lookahead_index = ra->ra_index + ra_size - la_size;
+}
+
+unsigned long ra_submit(struct file_ra_state *ra,
+		       struct address_space *mapping, struct file *filp);
+
 struct file {
 	/*
 	 * fu_list becomes invalid after file_free is called and queued via
-- 
cgit v1.2.3


From 122a21d11cbfda6d1e33cbc8ae9e4c4ee2f1886e Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:48:01 -0700
Subject: readahead: on-demand readahead logic

This is a minimal readahead algorithm that aims to replace the current one.
It is more flexible and reliable, while maintaining almost the same behavior
and performance.  Also it is full integrated with adaptive readahead.

It is designed to be called on demand:
	- on a missing page, to do synchronous readahead
	- on a lookahead page, to do asynchronous readahead

In this way it eliminated the awkward workarounds for cache hit/miss,
readahead thrashing, retried read, and unaligned read.  It also adopts the
data structure introduced by adaptive readahead, parameterizes readahead
pipelining with `lookahead_index', and reduces the current/ahead windows to
one single window.

HEURISTICS

The logic deals with four cases:

	- sequential-next
		found a consistent readahead window, so push it forward

	- random
		standalone small read, so read as is

	- sequential-first
		create a new readahead window for a sequential/oversize request

	- lookahead-clueless
		hit a lookahead page not associated with the readahead window,
		so create a new readahead window and ramp it up

In each case, three parameters are determined:

	- readahead index: where the next readahead begins
	- readahead size:  how much to readahead
	- lookahead size:  when to do the next readahead (for pipelining)

BEHAVIORS

The old behaviors are maximally preserved for trivial sequential/random reads.
Notable changes are:

	- It no longer imposes strict sequential checks.
	  It might help some interleaved cases, and clustered random reads.
	  It does introduce risks of a random lookahead hit triggering an
	  unexpected readahead. But in general it is more likely to do good
	  than to do evil.

	- Interleaved reads are supported in a minimal way.
	  Their chances of being detected and proper handled are still low.

	- Readahead thrashings are better handled.
	  The current readahead leads to tiny average I/O sizes, because it
	  never turn back for the thrashed pages.  They have to be fault in
	  by do_generic_mapping_read() one by one.  Whereas the on-demand
	  readahead will redo readahead for them.

OVERHEADS

The new code reduced the overheads of

	- excessively calling the readahead routine on small sized reads
	  (the current readahead code insists on seeing all requests)

	- doing a lot of pointless page-cache lookups for small cached files
	  (the current readahead only turns itself off after 256 cache hits,
	  unfortunately most files are < 1MB, so never see that chance)

That accounts for speedup of
	- 0.3% on 1-page sequential reads on sparse file
	- 1.2% on 1-page cache hot sequential reads
	- 3.2% on 256-page cache hot sequential reads
	- 1.3% on cache hot `tar /lib`

However, it does introduce one extra page-cache lookup per cache miss, which
impacts random reads slightly. That's 1% overheads for 1-page random reads on
sparse file.

PERFORMANCE

The basic benchmark setup is
	- 2.6.20 kernel with on-demand readahead
	- 1MB max readahead size
	- 2.9GHz Intel Core 2 CPU
	- 2GB memory
	- 160G/8M Hitachi SATA II 7200 RPM disk

The benchmarks show that
	- it maintains the same performance for trivial sequential/random reads
	- sysbench/OLTP performance on MySQL gains up to 8%
	- performance on readahead thrashing gains up to 3 times

iozone throughput (KB/s): roughly the same
==========================================
iozone -c -t1 -s 4096m -r 64k

			       2.6.20          on-demand      gain
first run
	  "  Initial write "   61437.27        64521.53      +5.0%
	  "        Rewrite "   47893.02        48335.20      +0.9%
	  "           Read "   62111.84        62141.49      +0.0%
	  "        Re-read "   62242.66        62193.17      -0.1%
	  "   Reverse Read "   50031.46        49989.79      -0.1%
	  "    Stride read "    8657.61         8652.81      -0.1%
	  "    Random read "   13914.28        13898.23      -0.1%
	  " Mixed workload "   19069.27        19033.32      -0.2%
	  "   Random write "   14849.80        14104.38      -5.0%
	  "         Pwrite "   62955.30        65701.57      +4.4%
	  "          Pread "   62209.99        62256.26      +0.1%

second run
	  "  Initial write "   60810.31        66258.69      +9.0%
	  "        Rewrite "   49373.89        57833.66     +17.1%
	  "           Read "   62059.39        62251.28      +0.3%
	  "        Re-read "   62264.32        62256.82      -0.0%
	  "   Reverse Read "   49970.96        50565.72      +1.2%
	  "    Stride read "    8654.81         8638.45      -0.2%
	  "    Random read "   13901.44        13949.91      +0.3%
	  " Mixed workload "   19041.32        19092.04      +0.3%
	  "   Random write "   14019.99        14161.72      +1.0%
	  "         Pwrite "   64121.67        68224.17      +6.4%
	  "          Pread "   62225.08        62274.28      +0.1%

In summary, writes are unstable, reads are pretty close on average:

			  access pattern  2.6.20  on-demand   gain
				   Read  62085.61  62196.38  +0.2%
				Re-read  62253.49  62224.99  -0.0%
			   Reverse Read  50001.21  50277.75  +0.6%
			    Stride read   8656.21   8645.63  -0.1%
			    Random read  13907.86  13924.07  +0.1%
	 		 Mixed workload  19055.29  19062.68  +0.0%
				  Pread  62217.53  62265.27  +0.1%

aio-stress: roughly the same
============================
aio-stress -l -s4096 -r128 -t1 -o1 knoppix511-dvd-cn.iso
aio-stress -l -s4096 -r128 -t1 -o3 knoppix511-dvd-cn.iso

					2.6.20      on-demand  delta
			sequential	 92.57s      92.54s    -0.0%
			random		311.87s     312.15s    +0.1%

sysbench fileio: roughly the same
=================================
sysbench --test=fileio --file-io-mode=async --file-test-mode=rndrw \
	 --file-total-size=4G --file-block-size=64K \
	 --num-threads=001 --max-requests=10000 --max-time=900 run

				threads    2.6.20   on-demand    delta
		first run
				      1   59.1974s    59.2262s  +0.0%
				      2   58.0575s    58.2269s  +0.3%
				      4   48.0545s    47.1164s  -2.0%
				      8   41.0684s    41.2229s  +0.4%
				     16   35.8817s    36.4448s  +1.6%
				     32   32.6614s    32.8240s  +0.5%
				     64   23.7601s    24.1481s  +1.6%
				    128   24.3719s    23.8225s  -2.3%
				    256   23.2366s    22.0488s  -5.1%

		second run
				      1   59.6720s    59.5671s  -0.2%
				      8   41.5158s    41.9541s  +1.1%
				     64   25.0200s    23.9634s  -4.2%
				    256   22.5491s    20.9486s  -7.1%

Note that the numbers are not very stable because of the writes.
The overall performance is close when we sum all seconds up:

                sum all up               495.046s    491.514s   -0.7%

sysbench oltp (trans/sec): up to 8% gain
========================================
sysbench --test=oltp --oltp-table-size=10000000 --oltp-read-only \
	 --mysql-socket=/var/run/mysqld/mysqld.sock \
	 --mysql-user=root --mysql-password=readahead \
	 --num-threads=064 --max-requests=10000 --max-time=900 run

	10000-transactions run
				threads    2.6.20   on-demand    gain
				      1     62.81       64.56   +2.8%
				      2     67.97       70.93   +4.4%
				      4     81.81       85.87   +5.0%
				      8     94.60       97.89   +3.5%
				     16     99.07      104.68   +5.7%
				     32     95.93      104.28   +8.7%
				     64     96.48      103.68   +7.5%
	5000-transactions run
				      1     48.21       48.65   +0.9%
				      8     68.60       70.19   +2.3%
				     64     70.57       74.72   +5.9%
	2000-transactions run
				      1     37.57       38.04   +1.3%
				      2     38.43       38.99   +1.5%
				      4     45.39       46.45   +2.3%
				      8     51.64       52.36   +1.4%
				     16     54.39       55.18   +1.5%
				     32     52.13       54.49   +4.5%
				     64     54.13       54.61   +0.9%

That's interesting results. Some investigations show that
	- MySQL is accessing the db file non-uniformly: some parts are
	  more hot than others
	- It is mostly doing 4-page random reads, and sometimes doing two
	  reads in a row, the latter one triggers a 16-page readahead.
	- The on-demand readahead leaves many lookahead pages (flagged
	  PG_readahead) there. Many of them will be hit, and trigger
	  more readahead pages. Which might save more seeks.
	- Naturally, the readahead windows tend to lie in hot areas,
	  and the lookahead pages in hot areas is more likely to be hit.
	- The more overall read density, the more possible gain.

That also explains the adaptive readahead tricks for clustered random reads.

readahead thrashing: 3 times better
===================================
We boot kernel with "mem=128m single", and start a 100KB/s stream on every
second, until reaching 200 streams.

			      max throughput     min avg I/O size
		2.6.20:            5MB/s               16KB
		on-demand:        15MB/s              140KB

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Steven Pratt <slpratt@austin.ibm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f8e12b3b6110..619c0e80cf0c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1138,6 +1138,12 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
+unsigned long page_cache_readahead_ondemand(struct address_space *mapping,
+			  struct file_ra_state *ra,
+			  struct file *filp,
+			  struct page *page,
+			  pgoff_t offset,
+			  unsigned long size);
 unsigned long page_cache_readahead(struct address_space *mapping,
 			  struct file_ra_state *ra,
 			  struct file *filp,
-- 
cgit v1.2.3


From c743d96b6d2ff55a94df7b5ac7c74987bb9c343b Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:48:04 -0700
Subject: readahead: remove the old algorithm

Remove the old readahead algorithm.

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Steven Pratt <slpratt@austin.ibm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 11 +----------
 include/linux/mm.h |  7 -------
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a5f562abc77..29cb32d3a849 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -701,14 +701,6 @@ struct fown_struct {
  *  file_ra_state.la_index    .ra_index   .lookahead_index   .readahead_index
  */
 struct file_ra_state {
-	unsigned long start;		/* Current window */
-	unsigned long size;
-	unsigned long flags;		/* ra flags RA_FLAG_xxx*/
-	unsigned long cache_hit;	/* cache hit count*/
-	unsigned long prev_index;	/* Cache last read() position */
-	unsigned long ahead_start;	/* Ahead window */
-	unsigned long ahead_size;
-
 	pgoff_t la_index;               /* enqueue time */
 	pgoff_t ra_index;               /* begin offset */
 	pgoff_t lookahead_index;        /* time to do next readahead */
@@ -717,10 +709,9 @@ struct file_ra_state {
 	unsigned long ra_pages;		/* Maximum readahead window */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
 	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
+	unsigned long prev_index;	/* Cache last read() position */
 	unsigned int prev_offset;	/* Offset where last read() ended in a page */
 };
-#define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
-#define RA_FLAG_INCACHE 0x02	/* file is already in cache */
 
 /*
  * Measuring read-ahead sizes.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 619c0e80cf0c..3d0d7d285237 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1144,13 +1144,6 @@ unsigned long page_cache_readahead_ondemand(struct address_space *mapping,
 			  struct page *page,
 			  pgoff_t offset,
 			  unsigned long size);
-unsigned long page_cache_readahead(struct address_space *mapping,
-			  struct file_ra_state *ra,
-			  struct file *filp,
-			  pgoff_t offset,
-			  unsigned long size);
-void handle_ra_miss(struct address_space *mapping, 
-		    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);
 
 /* Do stack extension */
-- 
cgit v1.2.3


From fe3cba17c49471e99d3421e675fc8b3deaaf0b70 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:48:07 -0700
Subject: mm: share PG_readahead and PG_reclaim

Share the same page flag bit for PG_readahead and PG_reclaim.

One is used only on file reads, another is only for emergency writes.  One
is used mostly for fresh/young pages, another is for old pages.

Combinations of possible interactions are:

a) clear PG_reclaim => implicit clear of PG_readahead
	it will delay an asynchronous readahead into a synchronous one
	it actually does _good_ for readahead:
		the pages will be reclaimed soon, it's readahead thrashing!
		in this case, synchronous readahead makes more sense.

b) clear PG_readahead => implicit clear of PG_reclaim
	one(and only one) page will not be reclaimed in time
	it can be avoided by checking PageWriteback(page) in readahead first

c) set PG_reclaim => implicit set of PG_readahead
	will confuse readahead and make it restart the size rampup process
	it's a trivial problem, and can mostly be avoided by checking
	PageWriteback(page) first in readahead

d) set PG_readahead => implicit set of PG_reclaim
	PG_readahead will never be set on already cached pages.
	PG_reclaim will always be cleared on dirtying a page.
	so not a problem.

In summary,
	a)   we get better behavior
	b,d) possible interactions can be avoided
	c)   racy condition exists that might affect readahead, but the chance
	     is _really_ low, and the hurt on readahead is trivial.

Compound pages also use PG_reclaim, but for now they do not interact with
reclaim/readahead code.

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 709d92fd2877..a454176c3e30 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -83,7 +83,6 @@
 #define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
-#define PG_readahead		13	/* Reminder to do async read-ahead */
 #define PG_compound		14	/* Part of a compound page */
 #define PG_swapcache		15	/* Swap page: swp_entry_t in private */
 
@@ -91,6 +90,9 @@
 #define PG_reclaim		17	/* To be reclaimed asap */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
+/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+#define PG_readahead		PG_reclaim /* Reminder to do async read-ahead */
+
 /* PG_owner_priv_1 users should have descriptive aliases */
 #define PG_checked		PG_owner_priv_1 /* Used by some filesystems */
 #define PG_pinned		PG_owner_priv_1	/* Xen pinned pagetable */
-- 
cgit v1.2.3


From cf914a7d656e62b9dd3e0dffe4f62b953ae6048d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 19 Jul 2007 01:48:08 -0700
Subject: readahead: split ondemand readahead interface into two functions

Split ondemand readahead interface into two functions.  I think this makes it
a little clearer for non-readahead experts (like Rusty).

Internally they both call ondemand_readahead(), but the page argument is
changed to an obvious boolean flag.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d0d7d285237..50a0ed1d1806 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1138,12 +1138,20 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
-unsigned long page_cache_readahead_ondemand(struct address_space *mapping,
-			  struct file_ra_state *ra,
-			  struct file *filp,
-			  struct page *page,
-			  pgoff_t offset,
-			  unsigned long size);
+
+void page_cache_sync_readahead(struct address_space *mapping,
+			       struct file_ra_state *ra,
+			       struct file *filp,
+			       pgoff_t offset,
+			       unsigned long size);
+
+void page_cache_async_readahead(struct address_space *mapping,
+				struct file_ra_state *ra,
+				struct file *filp,
+				struct page *pg,
+				pgoff_t offset,
+				unsigned long size);
+
 unsigned long max_sane_readahead(unsigned long nr);
 
 /* Do stack extension */
-- 
cgit v1.2.3


From f9acc8c7b35a100f3a9e0e6977f7807b0169f9a5 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
Date: Thu, 19 Jul 2007 01:48:08 -0700
Subject: readahead: sanify file_ra_state names

Rename some file_ra_state variables and remove some accessors.

It results in much simpler code.
Kudos to Rusty!

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 61 ++++++------------------------------------------------
 1 file changed, 6 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29cb32d3a849..d33beadd9a43 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -695,16 +695,12 @@ struct fown_struct {
 
 /*
  * Track a single file's readahead state
- *
- *  ================#============|==================#==================|
- *                  ^            ^                  ^                  ^
- *  file_ra_state.la_index    .ra_index   .lookahead_index   .readahead_index
  */
 struct file_ra_state {
-	pgoff_t la_index;               /* enqueue time */
-	pgoff_t ra_index;               /* begin offset */
-	pgoff_t lookahead_index;        /* time to do next readahead */
-	pgoff_t readahead_index;        /* end offset */
+	pgoff_t start;                  /* where readahead started */
+	unsigned long size;             /* # of readahead pages */
+	unsigned long async_size;       /* do asynchronous readahead when
+					   there are only # of pages ahead */
 
 	unsigned long ra_pages;		/* Maximum readahead window */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
@@ -713,60 +709,15 @@ struct file_ra_state {
 	unsigned int prev_offset;	/* Offset where last read() ended in a page */
 };
 
-/*
- * Measuring read-ahead sizes.
- *
- *                  |----------- readahead size ------------>|
- *  ===#============|==================#=====================|
- *     |------- invoke interval ------>|-- lookahead size -->|
- */
-static inline unsigned long ra_readahead_size(struct file_ra_state *ra)
-{
-	return ra->readahead_index - ra->ra_index;
-}
-
-static inline unsigned long ra_lookahead_size(struct file_ra_state *ra)
-{
-	return ra->readahead_index - ra->lookahead_index;
-}
-
-static inline unsigned long ra_invoke_interval(struct file_ra_state *ra)
-{
-	return ra->lookahead_index - ra->la_index;
-}
-
 /*
  * Check if @index falls in the readahead windows.
  */
 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 {
-	return (index >= ra->la_index &&
-		index <  ra->readahead_index);
-}
-
-/*
- * Where is the old read-ahead and look-ahead?
- */
-static inline void ra_set_index(struct file_ra_state *ra,
-				pgoff_t la_index, pgoff_t ra_index)
-{
-	ra->la_index = la_index;
-	ra->ra_index = ra_index;
+	return (index >= ra->start &&
+		index <  ra->start + ra->size);
 }
 
-/*
- * Where is the new read-ahead and look-ahead?
- */
-static inline void ra_set_size(struct file_ra_state *ra,
-				unsigned long ra_size, unsigned long la_size)
-{
-	ra->readahead_index = ra->ra_index + ra_size;
-	ra->lookahead_index = ra->ra_index + ra_size - la_size;
-}
-
-unsigned long ra_submit(struct file_ra_state *ra,
-		       struct address_space *mapping, struct file *filp);
-
 struct file {
 	/*
 	 * fu_list becomes invalid after file_free is called and queued via
-- 
cgit v1.2.3


From 81eae375eceba481ca4c605d42913871f093f6d5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 19 Jul 2007 01:48:09 -0700
Subject: jprobes: make struct jprobe.entry a void *

Currently jprobe.entry is a kprobe_opcode_t *, but that's a lie.  On some
platforms it doesn't point to an opcode at all, it points to a function
descriptor.

It's really a pointer to something that the arch code can turn into a function
entry point.  And that's what actually happens, none of the generic code ever
looks at jprobe.entry, it's only ever dereferenced by arch code.

So just make it a void *.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23adf6075ae4..f4e53b71d23f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -116,7 +116,7 @@ struct kprobe {
  */
 struct jprobe {
 	struct kprobe kp;
-	kprobe_opcode_t *entry;	/* probe handling code to jump to */
+	void *entry;	/* probe handling code to jump to */
 };
 
 DECLARE_PER_CPU(struct kprobe *, current_kprobe);
-- 
cgit v1.2.3


From 9e367d859297b9377d65574f538cf52730e9eda8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 19 Jul 2007 01:48:10 -0700
Subject: jprobes: remove JPROBE_ENTRY()

AFAICT now that jprobe.entry is a void *, JPROBE_ENTRY doesn't do anything
useful - so remove it ..

I've left a do-nothing version so that out-of-tree jprobes code will still
compile without modifications.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kprobes.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index f4e53b71d23f..bd892850c94a 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -119,6 +119,9 @@ struct jprobe {
 	void *entry;	/* probe handling code to jump to */
 };
 
+/* For backward compatibility with old code using JPROBE_ENTRY() */
+#define JPROBE_ENTRY(handler)	(handler)
+
 DECLARE_PER_CPU(struct kprobe *, current_kprobe);
 DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-- 
cgit v1.2.3


From 3d7e33825d8799115dd2495c9944badd3272a623 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 19 Jul 2007 01:48:11 -0700
Subject: jprobes: make jprobes a little safer for users

I realise jprobes are a razor-blades-included type of interface, but that
doesn't mean we can't try and make them safer to use.  This guy I know once
wrote code like this:

struct jprobe jp = { .kp.symbol_name = "foo", .entry = "jprobe_foo" };

And then his kernel exploded. Oops.

This patch adds an arch hook, arch_deref_entry_point() (I don't like it
either) which takes the void * in a struct jprobe, and gives back the text
address that it represents.

We can then use that in register_jprobe() to check that the entry point we're
passed is actually in the kernel text, rather than just some random value.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bd892850c94a..51464d12a4e5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -214,6 +214,7 @@ int longjmp_break_handler(struct kprobe *, struct pt_regs *);
 int register_jprobe(struct jprobe *p);
 void unregister_jprobe(struct jprobe *p);
 void jprobe_return(void);
+unsigned long arch_deref_entry_point(void *);
 
 int register_kretprobe(struct kretprobe *rp);
 void unregister_kretprobe(struct kretprobe *rp);
-- 
cgit v1.2.3


From bdf4c48af20a3b0f01671799ace345e3d49576da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Jul 2007 01:48:15 -0700
Subject: audit: rework execve audit

The purpose of audit_bprm() is to log the argv array to a userspace daemon at
the end of the execve system call.  Since user-space hasn't had time to run,
this array is still in pristine state on the process' stack; so no need to
copy it, we can just grab it from there.

In order to minimize the damage to audit_log_*() copy each string into a
temporary kernel buffer first.

Currently the audit code requires that the full argument vector fits in a
single packet.  So currently it does clip the argv size to a (sysctl) limit,
but only when execve auditing is enabled.

If the audit protocol gets extended to allow for multiple packets this check
can be removed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ollie Wild <aaw@google.com>
Cc: <linux-audit@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e1a708337be3..a0b209cd5761 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -40,6 +40,7 @@ struct linux_binprm{
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+	unsigned long argv_len;
 };
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
-- 
cgit v1.2.3


From b6a2fea39318e43fee84fa7b0b90d68bed92d2ba Mon Sep 17 00:00:00 2001
From: Ollie Wild <aaw@google.com>
Date: Thu, 19 Jul 2007 01:48:16 -0700
Subject: mm: variable length argument support

Remove the arg+env limit of MAX_ARG_PAGES by copying the strings directly from
the old mm into the new mm.

We create the new mm before the binfmt code runs, and place the new stack at
the very top of the address space.  Once the binfmt code runs and figures out
where the stack should be, we move it downwards.

It is a bit peculiar in that we have one task with two mm's, one of which is
inactive.

[a.p.zijlstra@chello.nl: limit stack size]
Signed-off-by: Ollie Wild <aaw@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <linux-arch@vger.kernel.org>
Cc: Hugh Dickins <hugh@veritas.com>
[bunk@stusta.de: unexport bprm_mm_init]
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h | 18 +++++++++++++-----
 include/linux/mm.h      |  9 ++++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a0b209cd5761..91c8c07fe8b7 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -6,11 +6,13 @@
 struct pt_regs;
 
 /*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
+ * These are the maximum length and maximum number of strings passed to the
+ * execve() system call.  MAX_ARG_STRLEN is essentially random but serves to
+ * prevent the kernel from being unduly impacted by misaddressed pointers.
+ * MAX_ARG_STRINGS is chosen to fit in a signed 32-bit integer.
  */
-#define MAX_ARG_PAGES 32
+#define MAX_ARG_STRLEN (PAGE_SIZE * 32)
+#define MAX_ARG_STRINGS 0x7FFFFFFF
 
 /* sizeof(linux_binprm->buf) */
 #define BINPRM_BUF_SIZE 128
@@ -24,7 +26,12 @@ struct pt_regs;
  */
 struct linux_binprm{
 	char buf[BINPRM_BUF_SIZE];
+#ifdef CONFIG_MMU
+	struct vm_area_struct *vma;
+#else
+# define MAX_ARG_PAGES	32
 	struct page *page[MAX_ARG_PAGES];
+#endif
 	struct mm_struct *mm;
 	unsigned long p; /* current top of mem */
 	int sh_bang;
@@ -69,7 +76,7 @@ extern int register_binfmt(struct linux_binfmt *);
 extern int unregister_binfmt(struct linux_binfmt *);
 
 extern int prepare_binprm(struct linux_binprm *);
-extern void remove_arg_zero(struct linux_binprm *);
+extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 
@@ -86,6 +93,7 @@ extern int suid_dumpable;
 extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
+extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern void compute_creds(struct linux_binprm *binprm);
 extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 50a0ed1d1806..c456c3a1c28e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -808,7 +808,6 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
 		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
@@ -825,9 +824,15 @@ int FASTCALL(set_page_dirty(struct page *page));
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
+extern unsigned long move_page_tables(struct vm_area_struct *vma,
+		unsigned long old_addr, struct vm_area_struct *new_vma,
+		unsigned long new_addr, unsigned long len);
 extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long old_len, unsigned long new_len,
 			       unsigned long flags, unsigned long new_addr);
+extern int mprotect_fixup(struct vm_area_struct *vma,
+			  struct vm_area_struct **pprev, unsigned long start,
+			  unsigned long end, unsigned long newflags);
 
 /*
  * A callback you can register to apply pressure to ageable caches.
@@ -1159,6 +1164,8 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 #ifdef CONFIG_IA64
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #endif
+extern int expand_stack_downwards(struct vm_area_struct *vma,
+				  unsigned long address);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
-- 
cgit v1.2.3


From 16f1820028d660d9da9c03b2ae7e98253c11795b Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Date: Thu, 19 Jul 2007 01:48:18 -0700
Subject: fs: introduce vfs_path_lookup

Stackable file systems, among others, frequently need to lookup paths or
path components starting from an arbitrary point in the namespace
(identified by a dentry and a vfsmount).  Currently, such file systems use
lookup_one_len, which is frowned upon [1] as it does not pass the lookup
intent along; not passing a lookup intent, for example, can trigger BUG_ON's
when stacking on top of NFSv4.

The first patch introduces a new lookup function to allow lookup starting
from an arbitrary point in the namespace.  This approach has been suggested
by Christoph Hellwig [2].

The second patch changes sunrpc to use vfs_path_lookup.

The third patch changes nfsctl.c to use vfs_path_lookup.

The fourth patch marks link_path_walk static.

The fifth, and last patch, unexports path_walk because it is no longer
unnecessary to call it directly, and using the new vfs_path_lookup is
cleaner.

For example, the following snippet of code, looks up "some/path/component"
in a directory pointed to by parent_{dentry,vfsmnt}:

err = vfs_path_lookup(parent_dentry, parent_vfsmnt,
		      "some/path/component", 0, &nd);
if (!err) {
	/* exits */

	...

	/* once done, release the references */
	path_release(&nd);
} else if (err == -ENOENT) {
	/* doesn't exist */
} else {
	/* other error */
}

VFS functions such as lookup_create can be used on the nameidata structure
to pass the create intent to the file system.

Signed-off-by: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/namei.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index b7dd24917f0d..2e21af0989d9 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -69,6 +69,8 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struc
 #define user_path_walk_link(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, 0, nd)
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+			   const char *, unsigned int, struct nameidata *);
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
-- 
cgit v1.2.3


From c4a7808fc3d7a346d5d12e0d69d76d66d821488b Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Date: Thu, 19 Jul 2007 01:48:22 -0700
Subject: fs: mark link_path_walk static

Signed-off-by: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/namei.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 2e21af0989d9..18ea81265068 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,7 +72,6 @@ extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
-extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
-- 
cgit v1.2.3


From f79c20f52532d38fd0aee7ef64e138cc1613c484 Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Date: Thu, 19 Jul 2007 01:48:22 -0700
Subject: fs: remove path_walk export

Signed-off-by: Josef 'Jeff' Sipek <jsipek@cs.sunysb.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/namei.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 18ea81265068..6c38efbd810f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -71,7 +71,6 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struc
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
-extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
-- 
cgit v1.2.3


From 6c5d523826dc639df709ed0f88c5d2ce25379652 Mon Sep 17 00:00:00 2001
From: "Kawai, Hidehiro" <hidehiro.kawai.ez@hitachi.com>
Date: Thu, 19 Jul 2007 01:48:27 -0700
Subject: coredump masking: reimplementation of dumpable using two flags

This patch changes mm_struct.dumpable to a pair of bit flags.

set_dumpable() converts three-value dumpable to two flags and stores it into
lower two bits of mm_struct.flags instead of mm_struct.dumpable.
get_dumpable() behaves in the opposite way.

[akpm@linux-foundation.org: export set_dumpable]
Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 731edaca8ffd..8dbd08366400 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -345,6 +345,13 @@ typedef unsigned long mm_counter_t;
 		(mm)->hiwater_vm = (mm)->total_vm;	\
 } while (0)
 
+extern void set_dumpable(struct mm_struct *mm, int value);
+extern int get_dumpable(struct mm_struct *mm);
+
+/* mm flags */
+#define MMF_DUMPABLE      0  /* core dump is permitted */
+#define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -402,7 +409,7 @@ struct mm_struct {
 	unsigned int token_priority;
 	unsigned int last_interval;
 
-	unsigned char dumpable:2;
+	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	/* coredumping support */
 	int core_waiters;
-- 
cgit v1.2.3


From 3cb4a0bb1e773e3c41800b33a3f7dab32bd06c64 Mon Sep 17 00:00:00 2001
From: "Kawai, Hidehiro" <hidehiro.kawai.ez@hitachi.com>
Date: Thu, 19 Jul 2007 01:48:28 -0700
Subject: coredump masking: add an interface for core dump filter

This patch adds an interface to set/reset flags which determines each memory
segment should be dumped or not when a core file is generated.

/proc/<pid>/coredump_filter file is provided to access the flags.  You can
change the flag status for a particular process by writing to or reading from
the file.

The flag status is inherited to the child process when it is created.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8dbd08366400..94f624aef017 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,8 +349,22 @@ extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
 /* mm flags */
+/* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
 #define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
+#define MMF_DUMPABLE_BITS 2
+
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE	2
+#define MMF_DUMP_ANON_SHARED	3
+#define MMF_DUMP_MAPPED_PRIVATE	4
+#define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS	4
+#define MMF_DUMP_FILTER_MASK \
+	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED))
 
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
-- 
cgit v1.2.3


From d9664c95afe5baa92ea56eff6a1c18e7b7a2cbe7 Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Thu, 19 Jul 2007 01:48:46 -0700
Subject: coda: block signals during upcall processing

We ignore signals for about 30 seconds to give userspace a chance to see the
upcall.  As we did not block signals we ended up in a busy loop for the
remainder of the period when a signal is received.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coda_psdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index b541bb3d1f4b..f28c2f7fd454 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -85,7 +85,6 @@ struct upc_req {
 	u_short	            uc_opcode;  /* copied from data to save lookup */
 	int		    uc_unique;
 	wait_queue_head_t   uc_sleep;   /* process' wait queue */
-	unsigned long       uc_posttime;
 };
 
 #define REQ_ASYNC  0x1
-- 
cgit v1.2.3


From a1b0aa87647493c0201821ab884e86298d5da7d6 Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Thu, 19 Jul 2007 01:48:50 -0700
Subject: coda: remove struct coda_sb_info

The sb_info structure only contains a single pointer to the character device,
there is no need for the added indirection.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coda_psdev.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index f28c2f7fd454..81b2e4c7d7ce 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -8,11 +8,6 @@
 
 struct kstatfs;
 
-struct coda_sb_info
-{
-	struct venus_comm *sbi_vcomm;
-};
-
 /* communication pending/processing queues */
 struct venus_comm {
 	u_long		    vc_seq;
@@ -24,9 +19,9 @@ struct venus_comm {
 };
 
 
-static inline struct coda_sb_info *coda_sbp(struct super_block *sb)
+static inline struct venus_comm *coda_vcp(struct super_block *sb)
 {
-    return ((struct coda_sb_info *)((sb)->s_fs_info));
+	return (struct venus_comm *)((sb)->s_fs_info);
 }
 
 
-- 
cgit v1.2.3


From 3cf01f28c303be34f18cb4f6204cf1bdfe12ba7c Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Thu, 19 Jul 2007 01:48:51 -0700
Subject: coda: remove statistics counters from /proc/fs/coda

Similar information can easily be obtained with strace -c.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coda_linux.h |  3 --
 include/linux/coda_proc.h  | 76 ----------------------------------------------
 include/linux/coda_psdev.h |  2 --
 3 files changed, 81 deletions(-)
 delete mode 100644 include/linux/coda_proc.h

(limited to 'include/linux')

diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index e4ac016ad272..c4079b403e9e 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -43,9 +43,6 @@ int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
 
-/* global variables */
-extern int coda_fake_statfs;
-
 /* this file:  heloers */
 static __inline__ struct CodaFid *coda_i2f(struct inode *);
 static __inline__ char *coda_i2s(struct inode *);
diff --git a/include/linux/coda_proc.h b/include/linux/coda_proc.h
deleted file mode 100644
index 0dc1b0458e75..000000000000
--- a/include/linux/coda_proc.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * coda_statis.h
- * 
- * CODA operation statistics
- *
- * (c) March, 1998
- * by Michihiro Kuramochi, Zhenyu Xia and Zhanyong Wan
- * zhanyong.wan@yale.edu
- *
- */
-
-#ifndef _CODA_PROC_H
-#define _CODA_PROC_H
-
-void coda_sysctl_init(void);
-void coda_sysctl_clean(void);
-
-#include <linux/sysctl.h>
-#include <linux/coda_fs_i.h>
-#include <linux/coda.h>
-
-/* these four files are presented to show the result of the statistics:
- *
- *	/proc/fs/coda/vfs_stats
- *		      cache_inv_stats
- *
- * these four files are presented to reset the statistics to 0:
- *
- *	/proc/sys/coda/vfs_stats
- *		       cache_inv_stats
- */
-
-/* VFS operation statistics */
-struct coda_vfs_stats 
-{
-	/* file operations */
-	int open;
-	int flush;
-	int release;
-	int fsync;
-
-	/* dir operations */
-	int readdir;
-  
-	/* inode operations */
-	int create;
-	int lookup;
-	int link;
-	int unlink;
-	int symlink;
-	int mkdir;
-	int rmdir;
-	int rename;
-	int permission;
-
-	/* symlink operatoins*/
-	int follow_link;
-	int readlink;
-};
-
-/* cache invalidation statistics */
-struct coda_cache_inv_stats
-{
-	int flush;
-	int purge_user;
-	int zap_dir;
-	int zap_file;
-	int zap_vnode;
-	int purge_fid;
-	int replace;
-};
-
-/* these global variables hold the actual statistics data */
-extern struct coda_vfs_stats		coda_vfs_stat;
-
-#endif /* _CODA_PROC_H */
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 81b2e4c7d7ce..aa8f454b3b77 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -69,8 +69,6 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs);
 
 
 /* messages between coda filesystem in kernel and Venus */
-extern int coda_hard;
-extern unsigned long coda_timeout;
 struct upc_req {
 	struct list_head    uc_chain;
 	caddr_t	            uc_data;
-- 
cgit v1.2.3


From 21f8ca3bf6198bd21e3c4cc820af2ccf753a6ec8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Jul 2007 01:48:53 -0700
Subject: fix raw_spinlock_t vs lockdep

Use the lockdep infrastructure to track lock contention and other lock
statistics.

It tracks lock contention events, and the first four unique call-sites that
encountered contention.

It also measures lock wait-time and hold-time in nanoseconds. The minimum and
maximum times are tracked, as well as a total (which together with the number
of event can give the avg).

All statistics are done per lock class, per write (exclusive state) and per read
(shared state).

The statistics are collected per-cpu, so that the collection overhead is
minimized via having no global cachemisses.

This new lock statistics feature is independent of the lock dependency checking
traditionally done by lockdep; it just shares the lock tracking code. It is
also possible to enable both and runtime disabled either component - thereby
avoiding the O(n^2) lock chain walks for instance.

This patch:

raw_spinlock_t should not use lockdep (and doesn't) since lockdep itself
relies on it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spinlock_types.h    | 4 ++--
 include/linux/spinlock_types_up.h | 9 +--------
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 210549ba4ef4..f6a3a951b79e 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,14 +9,14 @@
  * Released under the General Public License (GPL).
  */
 
-#include <linux/lockdep.h>
-
 #if defined(CONFIG_SMP)
 # include <asm/spinlock_types.h>
 #else
 # include <linux/spinlock_types_up.h>
 #endif
 
+#include <linux/lockdep.h>
+
 typedef struct {
 	raw_spinlock_t raw_lock;
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 27644af20b7c..04135b0e198e 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -12,14 +12,10 @@
  * Released under the General Public License (GPL).
  */
 
-#if defined(CONFIG_DEBUG_SPINLOCK) || \
-	defined(CONFIG_DEBUG_LOCK_ALLOC)
+#ifdef CONFIG_DEBUG_SPINLOCK
 
 typedef struct {
 	volatile unsigned int slock;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { 1 }
@@ -34,9 +30,6 @@ typedef struct { } raw_spinlock_t;
 
 typedef struct {
 	/* no debug version on UP */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED { }
-- 
cgit v1.2.3


From f20786ff4da51e56b1956acf30be2552be266746 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Jul 2007 01:48:56 -0700
Subject: lockstat: core infrastructure

Introduce the core lock statistics code.

Lock statistics provides lock wait-time and hold-time (as well as the count
of corresponding contention and acquisitions events). Also, the first few
call-sites that encounter contention are tracked.

Lock wait-time is the time spent waiting on the lock. This provides insight
into the locking scheme, that is, a heavily contended lock is indicative of
a too coarse locking scheme.

Lock hold-time is the duration the lock was held, this provides a reference for
the wait-time numbers, so they can be put into perspective.

  1)
    lock
  2)
    ... do stuff ..
    unlock
  3)

The time between 1 and 2 is the wait-time. The time between 2 and 3 is the
hold-time.

The lockdep held-lock tracking code is reused, because it already collects locks
into meaningful groups (classes), and because it is an existing infrastructure
for lock instrumentation.

Currently lockdep tracks lock acquisition with two hooks:

  lock()
    lock_acquire()
    _lock()

 ... code protected by lock ...

  unlock()
    lock_release()
    _unlock()

We need to extend this with two more hooks, in order to measure contention.

  lock_contended() - used to measure contention events
  lock_acquired()  - completion of the contention

These are then placed the following way:

  lock()
    lock_acquire()
    if (!_try_lock())
      lock_contended()
      _lock()
      lock_acquired()

 ... do locked stuff ...

  unlock()
    lock_release()
    _unlock()

(Note: the try_lock() 'trick' is used to avoid instrumenting all platform
       dependent lock primitive implementations.)

It is also possible to toggle the two lockdep features at runtime using:

  /proc/sys/kernel/prove_locking
  /proc/sys/kernel/lock_stat

(esp. turning off the O(n^2) prove_locking functionaliy can help)

[akpm@linux-foundation.org: build fixes]
[akpm@linux-foundation.org: nuke unneeded ifdefs]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 14c937d345cb..8f946f614f8e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -9,6 +9,7 @@
 #define __LINUX_LOCKDEP_H
 
 struct task_struct;
+struct lockdep_map;
 
 #ifdef CONFIG_LOCKDEP
 
@@ -114,8 +115,32 @@ struct lock_class {
 
 	const char			*name;
 	int				name_version;
+
+#ifdef CONFIG_LOCK_STAT
+	unsigned long			contention_point[4];
+#endif
+};
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+	s64				min;
+	s64				max;
+	s64				total;
+	unsigned long			nr;
 };
 
+struct lock_class_stats {
+	unsigned long			contention_point[4];
+	struct lock_time		read_waittime;
+	struct lock_time		write_waittime;
+	struct lock_time		read_holdtime;
+	struct lock_time		write_holdtime;
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
 /*
  * Map the lock object (the lock instance) to the lock-class object.
  * This is embedded into specific lock instances:
@@ -165,6 +190,10 @@ struct held_lock {
 	unsigned long			acquire_ip;
 	struct lockdep_map		*instance;
 
+#ifdef CONFIG_LOCK_STAT
+	u64 				waittime_stamp;
+	u64				holdtime_stamp;
+#endif
 	/*
 	 * The lock-stack is unified in that the lock chains of interrupt
 	 * contexts nest ontop of process context chains, but we 'separate'
@@ -281,6 +310,30 @@ struct lock_class_key { };
 
 #endif /* !LOCKDEP */
 
+#ifdef CONFIG_LOCK_STAT
+
+extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
+extern void lock_acquired(struct lockdep_map *lock);
+
+#define LOCK_CONTENDED(_lock, try, lock)			\
+do {								\
+	if (!try(_lock)) {					\
+		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
+		lock(_lock);					\
+		lock_acquired(&(_lock)->dep_map);		\
+	}							\
+} while (0)
+
+#else /* CONFIG_LOCK_STAT */
+
+#define lock_contended(lockdep_map, ip) do {} while (0)
+#define lock_acquired(lockdep_map) do {} while (0)
+
+#define LOCK_CONTENDED(_lock, try, lock) \
+	lock(_lock)
+
+#endif /* CONFIG_LOCK_STAT */
+
 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
 extern void early_init_irq_lock_class(void);
 #else
-- 
cgit v1.2.3


From 4b32d0a4e9ec07808a5c406a416c6576c986b047 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Jul 2007 01:48:59 -0700
Subject: lockdep: various fixes

 - update the copyright notices
 - use the default hash function
 - fix a thinko in a BUILD_BUG_ON
 - add a WARN_ON to spot inconsitent naming
 - fix a termination issue in /proc/lock_stat

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8f946f614f8e..3d3386b88b6a 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -1,7 +1,8 @@
 /*
  * Runtime locking correctness validator
  *
- *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
  * see Documentation/lockdep-design.txt for more details.
  */
-- 
cgit v1.2.3


From 96645678cd726e87ce42a0664de71e047e32bca4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Jul 2007 01:49:00 -0700
Subject: lockstat: measure lock bouncing

    __acquire
        |
       lock _____
        |        \
        |    __contended
        |         |
        |        wait
        | _______/
        |/
        |
   __acquired
        |
   __release
        |
     unlock

We measure acquisition and contention bouncing.

This is done by recording a cpu stamp in each lock instance.

Contention bouncing requires the cpu stamp to be set on acquisition. Hence we
move __acquired into the generic path.

__acquired is then used to measure acquisition bouncing by comparing the
current cpu with the old stamp before replacing it.

__contended is used to measure contention bouncing (only useful for preemptable
locks)

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 3d3386b88b6a..0e843bf65877 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -130,12 +130,24 @@ struct lock_time {
 	unsigned long			nr;
 };
 
+enum bounce_type {
+	bounce_acquired_write,
+	bounce_acquired_read,
+	bounce_contended_write,
+	bounce_contended_read,
+	nr_bounce_types,
+
+	bounce_acquired = bounce_acquired_write,
+	bounce_contended = bounce_contended_write,
+};
+
 struct lock_class_stats {
 	unsigned long			contention_point[4];
 	struct lock_time		read_waittime;
 	struct lock_time		write_waittime;
 	struct lock_time		read_holdtime;
 	struct lock_time		write_holdtime;
+	unsigned long			bounces[nr_bounce_types];
 };
 
 struct lock_class_stats lock_stats(struct lock_class *class);
@@ -150,6 +162,9 @@ struct lockdep_map {
 	struct lock_class_key		*key;
 	struct lock_class		*class_cache;
 	const char			*name;
+#ifdef CONFIG_LOCK_STAT
+	int				cpu;
+#endif
 };
 
 /*
@@ -321,8 +336,8 @@ do {								\
 	if (!try(_lock)) {					\
 		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
 		lock(_lock);					\
-		lock_acquired(&(_lock)->dep_map);		\
 	}							\
+	lock_acquired(&(_lock)->dep_map);			\
 } while (0)
 
 #else /* CONFIG_LOCK_STAT */
-- 
cgit v1.2.3


From 3b5ad0797c0e4049001f961a8b58f1d0ce532072 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 19 Jul 2007 01:49:02 -0700
Subject: stacktrace: fix header file for !CONFIG_STACKTRACE

The print_stack_trace macro in stacktrace.h has a wrong number of
arguments, fix it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stacktrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 1d2b084c0185..e7fa657d0c49 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -13,7 +13,7 @@ extern void save_stack_trace(struct stack_trace *trace);
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
 #else
 # define save_stack_trace(trace)			do { } while (0)
-# define print_stack_trace(trace)			do { } while (0)
+# define print_stack_trace(trace, spaces)		do { } while (0)
 #endif
 
 #endif
-- 
cgit v1.2.3


From d688abf50bd5a30d2c44dea2a72dd59052cd3cce Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 19 Jul 2007 01:49:17 -0700
Subject: move page writeback acounting out of macros

page-writeback accounting is presently performed in the page-flags macros.
This is inconsistent and a bit ugly and makes it awkward to implement
per-backing_dev under-writeback page accounting.

So move this accounting down to the callsite(s).

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 38 ++++++++------------------------------
 1 file changed, 8 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a454176c3e30..209d3a47f50f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -189,37 +189,15 @@ static inline void SetPageUptodate(struct page *page)
 #define __SetPagePrivate(page)  __set_bit(PG_private, &(page)->flags)
 #define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags)
 
+/*
+ * Only test-and-set exist for PG_writeback.  The unconditional operators are
+ * risky: they bypass page accounting.
+ */
 #define PageWriteback(page)	test_bit(PG_writeback, &(page)->flags)
-#define SetPageWriteback(page)						\
-	do {								\
-		if (!test_and_set_bit(PG_writeback,			\
-				&(page)->flags))			\
-			inc_zone_page_state(page, NR_WRITEBACK);	\
-	} while (0)
-#define TestSetPageWriteback(page)					\
-	({								\
-		int ret;						\
-		ret = test_and_set_bit(PG_writeback,			\
-					&(page)->flags);		\
-		if (!ret)						\
-			inc_zone_page_state(page, NR_WRITEBACK);	\
-		ret;							\
-	})
-#define ClearPageWriteback(page)					\
-	do {								\
-		if (test_and_clear_bit(PG_writeback,			\
-				&(page)->flags))			\
-			dec_zone_page_state(page, NR_WRITEBACK);	\
-	} while (0)
-#define TestClearPageWriteback(page)					\
-	({								\
-		int ret;						\
-		ret = test_and_clear_bit(PG_writeback,			\
-				&(page)->flags);			\
-		if (ret)						\
-			dec_zone_page_state(page, NR_WRITEBACK);	\
-		ret;							\
-	})
+#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback,	\
+							&(page)->flags)
+#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,	\
+							&(page)->flags)
 
 #define PageBuddy(page)		test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)	__set_bit(PG_buddy, &(page)->flags)
-- 
cgit v1.2.3


From e22841c637dc8b308b40f59d64a5b6683d458ab7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 19 Jul 2007 01:49:20 -0700
Subject: knfsd: move EX_RDONLY out of header

EX_RDONLY is only called in one place; just put it there.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 78feb7beff75..fb4e93016666 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -116,18 +116,6 @@ struct svc_expkey {
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
 
-static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp)
-{
-	struct exp_flavor_info *f;
-	struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
-
-	for (f = exp->ex_flavors; f < end; f++) {
-		if (f->pseudoflavor == rqstp->rq_flavor)
-			return f->flags & NFSEXP_READONLY;
-	}
-	return exp->ex_flags & NFSEXP_READONLY;
-}
-
 __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 
 /*
-- 
cgit v1.2.3


From c7d51402d2a64c5b96531f9900bb368020ebbbbb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 19 Jul 2007 01:49:20 -0700
Subject: knfsd: clean up EX_RDONLY

Share a little common code, reverse the arguments for consistency, drop the
unnecessary "inline", and lowercase the name.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nfsd/export.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index fb4e93016666..5cd192469096 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -116,6 +116,7 @@ struct svc_expkey {
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
 
+int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
 __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 
 /*
-- 
cgit v1.2.3


From 07ad157f6e5d228be78acd5cea0291e5d0360398 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 19 Jul 2007 01:49:22 -0700
Subject: lguest: the guest code

lguest is a simple hypervisor for Linux on Linux.  Unlike kvm it doesn't need
VT/SVM hardware.  Unlike Xen it's simply "modprobe and go".  Unlike both, it's
5000 lines and self-contained.

Performance is ok, but not great (-30% on kernel compile).  But given its
hackability, I expect this to improve, along with the paravirt_ops code which
it supplies a complete example for.  There's also a 64-bit version being
worked on and other craziness.

But most of all, lguest is awesome fun!  Too much of the kernel is a big ball
of hair.  lguest is simple enough to dive into and hack, plus has some warts
which scream "fork me!".

This patch:

This is the code and headers required to make an i386 kernel an lguest guest.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lguest.h     | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/lguest_bus.h | 48 ++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 include/linux/lguest.h
 create mode 100644 include/linux/lguest_bus.h

(limited to 'include/linux')

diff --git a/include/linux/lguest.h b/include/linux/lguest.h
new file mode 100644
index 000000000000..f30c04fc22b7
--- /dev/null
+++ b/include/linux/lguest.h
@@ -0,0 +1,85 @@
+/* Things the lguest guest needs to know.  Note: like all lguest interfaces,
+ * this is subject to wild and random change between versions. */
+#ifndef _ASM_LGUEST_H
+#define _ASM_LGUEST_H
+
+/* These are randomly chosen numbers which indicate we're an lguest at boot */
+#define LGUEST_MAGIC_EBP 0x4C687970
+#define LGUEST_MAGIC_EDI 0x652D4D65
+#define LGUEST_MAGIC_ESI 0xFFFFFFFF
+
+#ifndef __ASSEMBLY__
+#include <asm/irq.h>
+
+#define LHCALL_FLUSH_ASYNC	0
+#define LHCALL_LGUEST_INIT	1
+#define LHCALL_CRASH		2
+#define LHCALL_LOAD_GDT		3
+#define LHCALL_NEW_PGTABLE	4
+#define LHCALL_FLUSH_TLB	5
+#define LHCALL_LOAD_IDT_ENTRY	6
+#define LHCALL_SET_STACK	7
+#define LHCALL_TS		8
+#define LHCALL_TIMER_READ	9
+#define LHCALL_HALT		10
+#define LHCALL_GET_WALLCLOCK	11
+#define LHCALL_BIND_DMA		12
+#define LHCALL_SEND_DMA		13
+#define LHCALL_SET_PTE		14
+#define LHCALL_SET_PMD		15
+#define LHCALL_LOAD_TLS		16
+
+#define LGUEST_TRAP_ENTRY 0x1F
+
+static inline unsigned long
+hcall(unsigned long call,
+      unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+	asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
+		     : "=a"(call)
+		     : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
+		     : "memory");
+	return call;
+}
+
+void async_hcall(unsigned long call,
+		 unsigned long arg1, unsigned long arg2, unsigned long arg3);
+
+/* Can't use our min() macro here: needs to be a constant */
+#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
+
+#define LHCALL_RING_SIZE 64
+struct hcall_ring
+{
+	u32 eax, edx, ebx, ecx;
+};
+
+/* All the good stuff happens here: guest registers it with LGUEST_INIT */
+struct lguest_data
+{
+/* Fields which change during running: */
+	/* 512 == enabled (same as eflags) */
+	unsigned int irq_enabled;
+	/* Interrupts blocked by guest. */
+	DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
+
+	/* Virtual address of page fault. */
+	unsigned long cr2;
+
+	/* Async hypercall ring.  0xFF == done, 0 == pending. */
+	u8 hcall_status[LHCALL_RING_SIZE];
+	struct hcall_ring hcalls[LHCALL_RING_SIZE];
+
+/* Fields initialized by the hypervisor at boot: */
+	/* Memory not to try to access */
+	unsigned long reserve_mem;
+	/* ID of this guest (used by network driver to set ethernet address) */
+	u16 guestid;
+
+/* Fields initialized by the guest at boot: */
+	/* Instruction range to suppress interrupts even if enabled */
+	unsigned long noirq_start, noirq_end;
+};
+extern struct lguest_data lguest_data;
+#endif /* __ASSEMBLY__ */
+#endif	/* _ASM_LGUEST_H */
diff --git a/include/linux/lguest_bus.h b/include/linux/lguest_bus.h
new file mode 100644
index 000000000000..c9b4e05fee49
--- /dev/null
+++ b/include/linux/lguest_bus.h
@@ -0,0 +1,48 @@
+#ifndef _ASM_LGUEST_DEVICE_H
+#define _ASM_LGUEST_DEVICE_H
+/* Everything you need to know about lguest devices. */
+#include <linux/device.h>
+#include <linux/lguest.h>
+#include <linux/lguest_launcher.h>
+
+struct lguest_device {
+	/* Unique busid, and index into lguest_page->devices[] */
+	unsigned int index;
+
+	struct device dev;
+
+	/* Driver can hang data off here. */
+	void *private;
+};
+
+/* By convention, each device can use irq index+1 if it wants to. */
+static inline int lgdev_irq(const struct lguest_device *dev)
+{
+	return dev->index + 1;
+}
+
+/* dma args must not be vmalloced! */
+void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
+int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
+		    unsigned int num, u8 irq);
+void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas);
+
+/* Map the virtual device space */
+void *lguest_map(unsigned long phys_addr, unsigned long pages);
+void lguest_unmap(void *);
+
+struct lguest_driver {
+	const char *name;
+	struct module *owner;
+	u16 device_type;
+	int (*probe)(struct lguest_device *dev);
+	void (*remove)(struct lguest_device *dev);
+
+	struct device_driver drv;
+};
+
+extern int register_lguest_driver(struct lguest_driver *drv);
+extern void unregister_lguest_driver(struct lguest_driver *drv);
+
+extern struct lguest_device_desc *lguest_devices; /* Just past max_pfn */
+#endif /* _ASM_LGUEST_DEVICE_H */
-- 
cgit v1.2.3


From d7e28ffe6c74416b54345d6004fd0964c115b12c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 19 Jul 2007 01:49:23 -0700
Subject: lguest: the host code

This is the code for the "lg.ko" module, which allows lguest guests to
be launched.

[akpm@linux-foundation.org: update for futex-new-private-futexes]
[akpm@linux-foundation.org: build fix]
[jmorris@namei.org: lguest: use hrtimers]
[akpm@linux-foundation.org: x86_64 build fix]
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lguest.h          | 12 +++----
 include/linux/lguest_launcher.h | 73 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/lguest_launcher.h

(limited to 'include/linux')

diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index f30c04fc22b7..500aace21ca7 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -3,11 +3,6 @@
 #ifndef _ASM_LGUEST_H
 #define _ASM_LGUEST_H
 
-/* These are randomly chosen numbers which indicate we're an lguest at boot */
-#define LGUEST_MAGIC_EBP 0x4C687970
-#define LGUEST_MAGIC_EDI 0x652D4D65
-#define LGUEST_MAGIC_ESI 0xFFFFFFFF
-
 #ifndef __ASSEMBLY__
 #include <asm/irq.h>
 
@@ -20,7 +15,7 @@
 #define LHCALL_LOAD_IDT_ENTRY	6
 #define LHCALL_SET_STACK	7
 #define LHCALL_TS		8
-#define LHCALL_TIMER_READ	9
+#define LHCALL_SET_CLOCKEVENT	9
 #define LHCALL_HALT		10
 #define LHCALL_GET_WALLCLOCK	11
 #define LHCALL_BIND_DMA		12
@@ -29,6 +24,9 @@
 #define LHCALL_SET_PMD		15
 #define LHCALL_LOAD_TLS		16
 
+#define LG_CLOCK_MIN_DELTA	100UL
+#define LG_CLOCK_MAX_DELTA	ULONG_MAX
+
 #define LGUEST_TRAP_ENTRY 0x1F
 
 static inline unsigned long
@@ -75,6 +73,8 @@ struct lguest_data
 	unsigned long reserve_mem;
 	/* ID of this guest (used by network driver to set ethernet address) */
 	u16 guestid;
+	/* KHz for the TSC clock. */
+	u32 tsc_khz;
 
 /* Fields initialized by the guest at boot: */
 	/* Instruction range to suppress interrupts even if enabled */
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
new file mode 100644
index 000000000000..0ba414a40c80
--- /dev/null
+++ b/include/linux/lguest_launcher.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_LGUEST_USER
+#define _ASM_LGUEST_USER
+/* Everything the "lguest" userspace program needs to know. */
+/* They can register up to 32 arrays of lguest_dma. */
+#define LGUEST_MAX_DMA		32
+/* At most we can dma 16 lguest_dma in one op. */
+#define LGUEST_MAX_DMA_SECTIONS	16
+
+/* How many devices?  Assume each one wants up to two dma arrays per device. */
+#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2)
+
+struct lguest_dma
+{
+	/* 0 if free to be used, filled by hypervisor. */
+ 	u32 used_len;
+	unsigned long addr[LGUEST_MAX_DMA_SECTIONS];
+	u16 len[LGUEST_MAX_DMA_SECTIONS];
+};
+
+struct lguest_block_page
+{
+	/* 0 is a read, 1 is a write. */
+	int type;
+	u32 sector; 	/* Offset in device = sector * 512. */
+	u32 bytes;	/* Length expected to be read/written in bytes */
+	/* 0 = pending, 1 = done, 2 = done, error */
+	int result;
+	u32 num_sectors; /* Disk length = num_sectors * 512 */
+};
+
+/* There is a shared page of these. */
+struct lguest_net
+{
+	/* Simply the mac address (with multicast bit meaning promisc). */
+	unsigned char mac[6];
+};
+
+/* Where the Host expects the Guest to SEND_DMA console output to. */
+#define LGUEST_CONSOLE_DMA_KEY 0
+
+/* We have a page of these descriptors in the lguest_device page. */
+struct lguest_device_desc {
+	u16 type;
+#define LGUEST_DEVICE_T_CONSOLE	1
+#define LGUEST_DEVICE_T_NET	2
+#define LGUEST_DEVICE_T_BLOCK	3
+
+	u16 features;
+#define LGUEST_NET_F_NOCSUM		0x4000 /* Don't bother checksumming */
+#define LGUEST_DEVICE_F_RANDOMNESS	0x8000 /* IRQ is fairly random */
+
+	u16 status;
+/* 256 and above are device specific. */
+#define LGUEST_DEVICE_S_ACKNOWLEDGE	1 /* We have seen device. */
+#define LGUEST_DEVICE_S_DRIVER		2 /* We have found a driver */
+#define LGUEST_DEVICE_S_DRIVER_OK	4 /* Driver says OK! */
+#define LGUEST_DEVICE_S_REMOVED		8 /* Device has gone away. */
+#define LGUEST_DEVICE_S_REMOVED_ACK	16 /* Driver has been told. */
+#define LGUEST_DEVICE_S_FAILED		128 /* Something actually failed */
+
+	u16 num_pages;
+	u32 pfn;
+};
+
+/* Write command first word is a request. */
+enum lguest_req
+{
+	LHREQ_INITIALIZE, /* + pfnlimit, pgdir, start, pageoffset */
+	LHREQ_GETDMA, /* + addr (returns &lguest_dma, irq in ->used_len) */
+	LHREQ_IRQ, /* + irq */
+	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+};
+#endif /* _ASM_LGUEST_USER */
-- 
cgit v1.2.3


From c0d121720220584bba2876b032e58a076b843fa1 Mon Sep 17 00:00:00 2001
From: Dave Jiang <djiang@mvista.com>
Date: Thu, 19 Jul 2007 01:49:46 -0700
Subject: drivers/edac: add new nmi rescan

Provides a way for NMI reported errors on x86 to notify the EDAC
subsystem pending ECC errors by writing to a software state variable.

Here's the reworked patch. I added an EDAC stub to the kernel so we can
have variables that are in the kernel even if EDAC is a module. I also
implemented the idea of using the chip driver to select error detection
mode via module parameter and eliminate the kernel compile option.
Please review/test. Thx!

Also, I only made changes to some of the chipset drivers since I am
unfamiliar with the other ones. We can add similar changes as we go.

Signed-off-by: Dave Jiang <djiang@mvista.com>
Signed-off-by: Douglas Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/edac.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 include/linux/edac.h

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
new file mode 100644
index 000000000000..c8b92d79f884
--- /dev/null
+++ b/include/linux/edac.h
@@ -0,0 +1,29 @@
+/*
+ * Generic EDAC defs
+ *
+ * Author: Dave Jiang <djiang@mvista.com>
+ *
+ * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+#ifndef _LINUX_EDAC_H_
+#define _LINUX_EDAC_H_
+
+#include <asm/atomic.h>
+
+#define EDAC_OPSTATE_INVAL	-1
+#define EDAC_OPSTATE_POLL	0
+#define EDAC_OPSTATE_NMI	1
+#define EDAC_OPSTATE_INT	2
+
+extern int edac_op_state;
+extern atomic_t edac_handlers;
+extern atomic_t edac_err_assert;
+
+extern int edac_handler_set(void);
+extern void edac_atomic_assert_error(void);
+
+#endif
-- 
cgit v1.2.3


From 535c6a53035d8911f6b90455550c5fde0da7b866 Mon Sep 17 00:00:00 2001
From: Jason Uhlenkott <juhlenko@akamai.com>
Date: Thu, 19 Jul 2007 01:49:48 -0700
Subject: drivers/edac: new inte 30x0 MC driver

Here's a driver for the Intel 3000 and 3010 memory controllers,
relative to today's Sourceforge code drop.  This has only had light
testing (I've yet to actually see it handle a memory error) but it
detects my hardware correctly.

Signed-off-by: Jason Uhlenkott <juhlenko@akamai.com>
Signed-off-by: Douglas Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2c7add169539..7ec01b7525b6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2209,6 +2209,7 @@
 #define PCI_DEVICE_ID_INTEL_82915GM_IG	0x2592
 #define PCI_DEVICE_ID_INTEL_82945G_HB	0x2770
 #define PCI_DEVICE_ID_INTEL_82945G_IG	0x2772
+#define PCI_DEVICE_ID_INTEL_3000_HB	0x2778
 #define PCI_DEVICE_ID_INTEL_82945GM_HB	0x27A0
 #define PCI_DEVICE_ID_INTEL_82945GM_IG	0x27A2
 #define PCI_DEVICE_ID_INTEL_ICH6_0	0x2640
-- 
cgit v1.2.3


From 66ee2f940ac8ab25f0c43a1e717d25dc46bfe74d Mon Sep 17 00:00:00 2001
From: Dave Jiang <djiang@mvista.com>
Date: Thu, 19 Jul 2007 01:49:54 -0700
Subject: drivers/edac: mod assert_error check

Change error check and clear variable from an atomic to an int

Signed-off-by: Dave Jiang <djiang@mvista.com>
Signed-off-by: Douglas Thompson <dougthompson@xmission.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/edac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index c8b92d79f884..eab451e69a91 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -20,8 +20,8 @@
 #define EDAC_OPSTATE_INT	2
 
 extern int edac_op_state;
+extern int edac_err_assert;
 extern atomic_t edac_handlers;
-extern atomic_t edac_err_assert;
 
 extern int edac_handler_set(void);
 extern void edac_atomic_assert_error(void);
-- 
cgit v1.2.3


From 53078ca84b1c01f36c306d1f52e2f88c7bb2f9e4 Mon Sep 17 00:00:00 2001
From: Douglas Thompson <dougthompson@xmission.com>
Date: Thu, 19 Jul 2007 01:50:17 -0700
Subject: include/linux/pci_id.h: add amd northbridge defines

pci_ids.h needs two of the AMD NB device-ids namely, Addressmap and the Memory
Controller devices

This patch adds those to the pci_id.h include file

Signed-off-by:	Douglas Thompson <dougthompson@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7ec01b7525b6..b15c6498fe67 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -495,6 +495,8 @@
 
 #define PCI_VENDOR_ID_AMD		0x1022
 #define PCI_DEVICE_ID_AMD_K8_NB		0x1100
+#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP	0x1101
+#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL	0x1102
 #define PCI_DEVICE_ID_AMD_K8_NB_MISC	0x1103
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
cgit v1.2.3


From e24b8cb4fa2bb779bdf48656152366b6f52f748f Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 8 Jul 2007 14:26:37 +0200
Subject: i2c: Delete the i2c-isa pseudo bus driver

There are no users of i2c-isa left, so we can finally get rid of it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c-isa.h | 36 ------------------------------------
 include/linux/i2c.h     |  1 -
 2 files changed, 37 deletions(-)
 delete mode 100644 include/linux/i2c-isa.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h
deleted file mode 100644
index 67e3598c4cec..000000000000
--- a/include/linux/i2c-isa.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * i2c-isa.h - definitions for the i2c-isa pseudo-i2c-adapter interface
- *
- * Copyright (C) 2005 Jean Delvare <khali@linux-fr.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _LINUX_I2C_ISA_H
-#define _LINUX_I2C_ISA_H
-
-#include <linux/i2c.h>
-
-extern int i2c_isa_add_driver(struct i2c_driver *driver);
-extern int i2c_isa_del_driver(struct i2c_driver *driver);
-
-/* Detect whether we are on the isa bus. This is only useful to hybrid
-   (i2c+isa) drivers. */
-#define i2c_is_isa_adapter(adapptr) \
-        ((adapptr)->id == I2C_HW_ISA)
-#define i2c_is_isa_client(clientptr) \
-        i2c_is_isa_adapter((clientptr)->adapter)
-
-#endif /* _LINUX_I2C_ISA_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cae7d618030c..47f40376a3c7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -362,7 +362,6 @@ struct i2c_client_address_data {
 
 /* The numbers to use to set I2C bus address */
 #define ANY_I2C_BUS		0xffff
-#define ANY_I2C_ISA_BUS		9191
 
 
 /* ----- functions exported by i2c.o */
-- 
cgit v1.2.3


From be879c4e249a8875d7129f3b0c1bb62584dafbd8 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 11 Jul 2007 18:39:02 -0400
Subject: SUNRPC: move bkl locking and xdr proc invocation into a common helper

Since every invocation of xdr encode or decode functions takes the BKL now,
there's a lot of redundant lock_kernel/unlock_kernel pairs that we can pull
out into a common function.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 9e340fa23c06..c6b53d181bfa 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -12,6 +12,7 @@
 #include <linux/uio.h>
 #include <asm/byteorder.h>
 #include <linux/scatterlist.h>
+#include <linux/smp_lock.h>
 
 /*
  * Buffer adjustment
@@ -35,6 +36,21 @@ struct xdr_netobj {
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
 
+/*
+ * We're still requiring the BKL in the xdr code until it's been
+ * more carefully audited, at which point this wrapper will become
+ * unnecessary.
+ */
+static inline int rpc_call_xdrproc(kxdrproc_t xdrproc, void *rqstp, __be32 *data, void *obj)
+{
+	int ret;
+
+	lock_kernel();
+	ret = xdrproc(rqstp, data, obj);
+	unlock_kernel();
+	return ret;
+}
+
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
-- 
cgit v1.2.3


From 4fdc17b2a7f4d9db5b08e0f963d0027f714e4104 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:57 -0400
Subject: NFS: Introduce struct nfs_removeargs+nfs_removeres

We need a common structure for setting up an unlink() rpc call in order to
fix the asynchronous unlink code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 38d77681cf27..7babcb16300b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -277,6 +277,21 @@ struct nfs_writeres {
 	const struct nfs_server *server;
 };
 
+/*
+ * Common arguments to the unlink call
+ */
+struct nfs_removeargs {
+	const struct nfs_fh	*fh;
+	struct qstr		name;
+	const u32 *		bitmask;
+};
+
+struct nfs_removeres {
+	const struct nfs_server *server;
+	struct nfs4_change_info	cinfo;
+	struct nfs_fattr	dir_attr;
+};
+
 /*
  * Argument struct for decode_entry function
  */
@@ -631,18 +646,6 @@ struct nfs4_readlink {
 	struct page **			pages;   /* zero-copy data */
 };
 
-struct nfs4_remove_arg {
-	const struct nfs_fh *		fh;
-	const struct qstr *		name;
-	const u32 *			bitmask;
-};
-
-struct nfs4_remove_res {
-	const struct nfs_server *	server;
-	struct nfs4_change_info		cinfo;
-	struct nfs_fattr *		dir_attr;
-};
-
 struct nfs4_rename_arg {
 	const struct nfs_fh *		old_dir;
 	const struct nfs_fh *		new_dir;
-- 
cgit v1.2.3


From e4eff1a622edd6ab7b73acd5d8763aa2fa3fee49 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:58 -0400
Subject: SUNRPC: Clean up the sillyrename code

Fix a couple of bugs:
 - Don't rely on the parent dentry still being valid when the call completes.
   Fixes a race with shrink_dcache_for_umount_subtree()

 - Don't remove the file if the filehandle has been labelled as stale.

Fix a couple of inefficiencies
 - Remove the global list of sillyrenamed files. Instead we can cache the
   sillyrename information in the dentry->d_fsdata
 - Move common code from unlink_setup/unlink_done into fs/nfs/unlink.c

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h  | 4 ++--
 include/linux/nfs_xdr.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c098ae194f79..9ba4aec37c50 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -407,8 +407,8 @@ extern void nfs_release_automount_timer(void);
 /*
  * linux/fs/nfs/unlink.c
  */
-extern int  nfs_async_unlink(struct dentry *);
-extern void nfs_complete_unlink(struct dentry *);
+extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
+extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
 
 /*
  * linux/fs/nfs/write.c
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7babcb16300b..cf74a4db84a5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -791,9 +791,8 @@ struct nfs_rpc_ops {
 	int	(*create)  (struct inode *, struct dentry *,
 			    struct iattr *, int, struct nameidata *);
 	int	(*remove)  (struct inode *, struct qstr *);
-	int	(*unlink_setup)  (struct rpc_message *,
-			    struct dentry *, struct qstr *);
-	int	(*unlink_done) (struct dentry *, struct rpc_task *);
+	void	(*unlink_setup)  (struct rpc_message *, struct inode *dir);
+	int	(*unlink_done) (struct rpc_task *, struct inode *);
 	int	(*rename)  (struct inode *, struct qstr *,
 			    struct inode *, struct qstr *);
 	int	(*link)    (struct inode *, struct inode *, struct qstr *);
-- 
cgit v1.2.3


From e436d80085133858bf2613a630365e8a0459fd58 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 19 Jul 2007 21:28:35 +0200
Subject: [PATCH] sched: implement cpu_clock(cpu) high-speed time source

Implement the cpu_clock(cpu) interface for kernel-internal use:
high-speed (but slightly incorrect) per-cpu clock constructed from
sched_clock().

This API, unused at the moment, will be used in the future by blktrace,
by the softlockup-watchdog, by printk and by lockstat.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94f624aef017..33b9b4841ee7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1348,6 +1348,13 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 #endif
 
 extern unsigned long long sched_clock(void);
+
+/*
+ * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+ * clock constructed from sched_clock():
+ */
+extern unsigned long long cpu_clock(int cpu);
+
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
-- 
cgit v1.2.3


From 626ac545c12e5f9bffe93086d1d03d26c99987ea Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Tue, 17 Jul 2007 15:28:17 -0400
Subject: user namespace: fix copy_user_ns return value

When a CONFIG_USER_NS=n and a user tries to unshare some namespace other
than the user namespace, the dummy copy_user_ns returns NULL rather than
the old_ns.

This value then gets assigned to task->nsproxy->user_ns, so that a
subsequent setuid, which uses task->nsproxy->user_ns, causes a NULL
pointer deref.

Fix this by returning old_ns.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/user_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index bb320573bb9e..1101b0ce878f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -49,7 +49,7 @@ static inline struct user_namespace *copy_user_ns(int flags,
 	if (flags & CLONE_NEWUSER)
 		return ERR_PTR(-EINVAL);
 
-	return NULL;
+	return old_ns;
 }
 
 static inline void put_user_ns(struct user_namespace *ns)
-- 
cgit v1.2.3


From 342cdb6d4739cee430efc3eafcacd1605db66036 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:55 +0200
Subject: ide: make ide_get_best_pio_mode() print info if overriding PIO mode

* Print info about overriding PIO mode in ide_get_best_pio_mode().

* Remove info about overriding PIO mode from cmd64{0,x} host drivers.

* Remove no longer needed ide_pio_data_t.overridden field.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 19ab25804056..83a117d673c7 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1375,7 +1375,6 @@ typedef struct ide_pio_timings_s {
 typedef struct ide_pio_data_s {
 	u8 pio_mode;
 	u8 use_iordy;
-	u8 overridden;
 	unsigned int cycle_time;
 } ide_pio_data_t;
 
-- 
cgit v1.2.3


From 2229833c1365346b64357a9263fa724f74f5e376 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:55 +0200
Subject: ide: add ide_dev_has_iordy() helper (take 4)

* Add ide_dev_has_iordy() helper and use it sl82c105 host driver.

* Remove no longer needed ide_pio_data_t.use_iordy field.

v2/v3:
* Fix issues noticed by Sergei:
  - correct patch description
  - fix comment in ide_get_best_pio_mode()

v4:
* Fix "ata_" prefix (Noticed by Jeff).

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 83a117d673c7..349c22a1fbc5 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1363,6 +1363,11 @@ extern void ide_toggle_bounce(ide_drive_t *drive, int on);
 extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate);
 int ide_use_fast_pio(ide_drive_t *);
 
+static inline int ide_dev_has_iordy(struct hd_driveid *id)
+{
+	return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0;
+}
+
 u8 ide_dump_status(ide_drive_t *, const char *, u8);
 
 typedef struct ide_pio_timings_s {
@@ -1374,7 +1379,6 @@ typedef struct ide_pio_timings_s {
 
 typedef struct ide_pio_data_s {
 	u8 pio_mode;
-	u8 use_iordy;
 	unsigned int cycle_time;
 } ide_pio_data_t;
 
-- 
cgit v1.2.3


From a5d8c5c834d3cabf4b7b477c3f6ee923c25026fc Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:55 +0200
Subject: ide: add ide_pci_device_t.host_flags (take 2)

* Rename ide_pci_device_t.flags to ide_pci_device_t.host_flags
  and IDEPCI_FLAG_ISA_PORTS flag to IDE_HFLAG_ISA_PORTS.

* Add IDE_HFLAG_SINGLE flag for single channel devices.

* Convert core code and all IDE PCI drivers to use IDE_HFLAG_SINGLE
  and remove no longer needed ide_pci_device_t.channels field.

v2:
* Fix issues noticed by Sergei:
  - correct code alignment in scc_pata.c
  - s/IDE_HFLAG_SINGLE/~IDE_HFLAG_SINGLE/ in serverworks.c

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 349c22a1fbc5..498dc57627fa 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1244,7 +1244,8 @@ typedef struct ide_pci_enablebit_s {
 
 enum {
 	/* Uses ISA control ports not PCI ones. */
-	IDEPCI_FLAG_ISA_PORTS		= (1 << 0),
+	IDE_HFLAG_ISA_PORTS		= (1 << 0),
+	IDE_HFLAG_SINGLE		= (1 << 1),
 };
 
 typedef struct ide_pci_device_s {
@@ -1256,13 +1257,12 @@ typedef struct ide_pci_device_s {
 	void                    (*init_hwif)(ide_hwif_t *);
 	void			(*init_dma)(ide_hwif_t *, unsigned long);
 	void			(*fixup)(ide_hwif_t *);
-	u8			channels;
 	u8			autodma;
 	ide_pci_enablebit_t	enablebits[2];
 	u8			bootable;
 	unsigned int		extra;
 	struct ide_pci_device_s	*next;
-	u8			flags;
+	u8			host_flags;
 	u8			udma_mask;
 } ide_pci_device_t;
 
-- 
cgit v1.2.3


From 7dd00083b1160b560fa2a0a486799b57baa5d035 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:56 +0200
Subject: ide: add ide_pio_cycle_time() helper (take 2)

* Add ide_pio_cycle_time() helper.

* Use it in ali14xx/ht6560b/qd65xx/cmd64{0,x}/sl82c105 and pmac host drivers
  (previously cycle time given by the device was only used for "pio" == 255).

* Remove no longer needed ide_pio_data_t.cycle_time field.

v2:
* Fix "ata_" prefix (Noticed by Jeff).

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 498dc57627fa..0afa52c14ffa 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1379,9 +1379,9 @@ typedef struct ide_pio_timings_s {
 
 typedef struct ide_pio_data_s {
 	u8 pio_mode;
-	unsigned int cycle_time;
 } ide_pio_data_t;
 
+unsigned int ide_pio_cycle_time(ide_drive_t *, u8);
 extern u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode, ide_pio_data_t *d);
 extern const ide_pio_timings_t ide_pio_timings[6];
 
-- 
cgit v1.2.3


From 2134758d2a5429325cee4d4ce8959af5314eeba1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:58 +0200
Subject: ide: drop "PIO data" argument from ide_get_best_pio_mode()

* Drop no longer needed "PIO data" argument from ide_get_best_pio_mode()
  and convert all users accordingly.

* Remove no longer needed ide_pio_data_t.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0afa52c14ffa..14a87f619d17 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1377,12 +1377,8 @@ typedef struct ide_pio_timings_s {
 				/* active + recovery (+ setup for some chips) */
 } ide_pio_timings_t;
 
-typedef struct ide_pio_data_s {
-	u8 pio_mode;
-} ide_pio_data_t;
-
 unsigned int ide_pio_cycle_time(ide_drive_t *, u8);
-extern u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode, ide_pio_data_t *d);
+u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
 extern const ide_pio_timings_t ide_pio_timings[6];
 
 
-- 
cgit v1.2.3


From 6a824c92db4d606c324272c4eed366fb71672440 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:58 +0200
Subject: ide: remove ide_find_best_pio_mode()

* Add ->host_flags to ide_hwif_t to store ide_pci_device_t.host_flags,
  assign it in setup-pci.c:ide_pci_setup_ports().

* Add IDE_HFLAG_PIO_NO_{BLACKLIST,DOWNGRADE} to ide_pci_device_t.host_flags
  and teach ide_get_best_pio_mode() about them.  Also remove needless
  !drive->id check while at it (drive->id is always present).

* Convert amd74xx, via82cxxx and ide-timing.h to use ide_get_best_pio_mode()
  and then remove no longer needed ide_find_best_pio_mode().

There should be no functionality changes caused by this patch.

Acked-by: Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 14a87f619d17..9f72f6e0c954 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -681,6 +681,8 @@ typedef struct hwif_s {
 	u8 straight8;	/* Alan's straight 8 check */
 	u8 bus_state;	/* power state of the IDE bus */
 
+	u8 host_flags;
+
 	u8 atapi_dma;	/* host supports atapi_dma */
 	u8 ultra_mask;
 	u8 mwdma_mask;
@@ -1245,7 +1247,12 @@ typedef struct ide_pci_enablebit_s {
 enum {
 	/* Uses ISA control ports not PCI ones. */
 	IDE_HFLAG_ISA_PORTS		= (1 << 0),
+	/* single port device */
 	IDE_HFLAG_SINGLE		= (1 << 1),
+	/* don't use legacy PIO blacklist */
+	IDE_HFLAG_PIO_NO_BLACKLIST	= (1 << 2),
+	/* don't use conservative PIO "downgrade" */
+	IDE_HFLAG_PIO_NO_DOWNGRADE	= (1 << 3),
 };
 
 typedef struct ide_pci_device_s {
-- 
cgit v1.2.3


From 4099d14322149c7a467e4997b87be4ba8eb78697 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 20 Jul 2007 01:11:59 +0200
Subject: ide: add PIO masks

* Add ATA_PIO[0-6] defines to <linux/ata.h>.

* Add ->pio_mask field to ide_pci_device_t and ide_hwif_t.

* Add PIO masks to host drivers.

<linux/ata.h> change ACK-ed by Jeff Garzik <jeff@garzik.org>.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ata.h | 9 +++++++++
 include/linux/ide.h | 3 +++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index b5a20162af32..23a22df039d8 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -64,6 +64,15 @@ enum {
 	ATA_ID_PROD_LEN		= 40,
 
 	ATA_PCI_CTL_OFS		= 2,
+
+	ATA_PIO0		= (1 << 0),
+	ATA_PIO1		= ATA_PIO0 | (1 << 1),
+	ATA_PIO2		= ATA_PIO1 | (1 << 2),
+	ATA_PIO3		= ATA_PIO2 | (1 << 3),
+	ATA_PIO4		= ATA_PIO3 | (1 << 4),
+	ATA_PIO5		= ATA_PIO4 | (1 << 5),
+	ATA_PIO6		= ATA_PIO5 | (1 << 6),
+
 	ATA_UDMA0		= (1 << 0),
 	ATA_UDMA1		= ATA_UDMA0 | (1 << 1),
 	ATA_UDMA2		= ATA_UDMA1 | (1 << 2),
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9f72f6e0c954..5f5daad8bc54 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -683,6 +683,8 @@ typedef struct hwif_s {
 
 	u8 host_flags;
 
+	u8 pio_mask;
+
 	u8 atapi_dma;	/* host supports atapi_dma */
 	u8 ultra_mask;
 	u8 mwdma_mask;
@@ -1270,6 +1272,7 @@ typedef struct ide_pci_device_s {
 	unsigned int		extra;
 	struct ide_pci_device_s	*next;
 	u8			host_flags;
+	u8			pio_mask;
 	u8			udma_mask;
 } ide_pci_device_t;
 
-- 
cgit v1.2.3


From 20c2df83d25c6a95affe6157a4c9cac4cf5ffaac Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 20 Jul 2007 10:11:58 +0900
Subject: mm: Remove slab destructors from kmem_cache_create().

Slab destructors were no longer supported after Christoph's
c59def9f222d44bb7e2f0a559f2906191a0862d7 change. They've been
BUGs for both slab and slub, and slob never supported them
either.

This rips out support for the dtor pointer from kmem_cache_create()
completely and fixes up every single callsite in the kernel (there were
about 224, not including the slab allocator definitions themselves,
or the documentation references).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/i2o.h  | 3 +--
 include/linux/slab.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 333a370a3bdc..9752307d16ba 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -946,8 +946,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
 	strcpy(pool->name, name);
 
 	pool->slab =
-	    kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL,
-			      NULL);
+	    kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
 	if (!pool->slab)
 		goto free_name;
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0e1d0daef6a2..7d0ecc1659f0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -51,7 +51,6 @@ int slab_is_available(void);
 
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
-			void (*)(void *, struct kmem_cache *, unsigned long),
 			void (*)(void *, struct kmem_cache *, unsigned long));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
@@ -70,7 +69,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
  */
 #define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
 		sizeof(struct __struct), __alignof__(struct __struct),\
-		(__flags), NULL, NULL)
+		(__flags), NULL)
 
 /*
  * The largest kmalloc size supported by the slab allocators is
-- 
cgit v1.2.3


From 76c1ce7870fd9b05431da1bbd47fdafcc029a25b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 1 May 2007 16:19:07 +1000
Subject: Split out common parts of prom.h

This creates linux/of.h and includes asm/prom.h from it.

We also include linux/of.h from asm/prom.h while we transition.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 include/linux/of.h

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
new file mode 100644
index 000000000000..47734ffd9745
--- /dev/null
+++ b/include/linux/of.h
@@ -0,0 +1,61 @@
+#ifndef _LINUX_OF_H
+#define _LINUX_OF_H
+/*
+ * Definitions for talking to the Open Firmware PROM on
+ * Power Macintosh and other computers.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
+ * Updates for SPARC64 by David S. Miller
+ * Derived from PowerPC and Sparc prom.h files by Stephen Rothwell, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#include <asm/bitops.h>
+#include <asm/prom.h>
+
+/* flag descriptions */
+#define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
+#define OF_DETACHED	2 /* node has been detached from the device tree */
+
+#define OF_BAD_ADDR	((u64)-1)
+
+extern struct device_node *of_find_node_by_name(struct device_node *from,
+	const char *name);
+#define for_each_node_by_name(dn, name) \
+	for (dn = of_find_node_by_name(NULL, name); dn; \
+	     dn = of_find_node_by_name(dn, name))
+extern struct device_node *of_find_node_by_type(struct device_node *from,
+	const char *type);
+#define for_each_node_by_type(dn, type) \
+	for (dn = of_find_node_by_type(NULL, type); dn; \
+	     dn = of_find_node_by_type(dn, type))
+extern struct device_node *of_find_compatible_node(struct device_node *from,
+	const char *type, const char *compat);
+#define for_each_compatible_node(dn, type, compatible) \
+	for (dn = of_find_compatible_node(NULL, type, compatible); dn; \
+	     dn = of_find_compatible_node(dn, type, compatible))
+extern struct device_node *of_find_node_by_path(const char *path);
+extern struct device_node *of_find_node_by_phandle(phandle handle);
+extern struct device_node *of_get_parent(const struct device_node *node);
+extern struct device_node *of_get_next_child(const struct device_node *node,
+					     struct device_node *prev);
+extern struct property *of_find_property(const struct device_node *np,
+					 const char *name,
+					 int *lenp);
+extern int of_device_is_compatible(const struct device_node *device,
+				   const char *);
+extern const void *of_get_property(const struct device_node *node,
+				const char *name,
+				int *lenp);
+#define get_property(a, b, c)	of_get_property((a), (b), (c))
+extern int of_n_addr_cells(struct device_node *np);
+extern int of_n_size_cells(struct device_node *np);
+
+#endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From f898f8dbcec4848cddb8c5be2d0affd75779ebe2 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 1 May 2007 16:49:51 +1000
Subject: Begin consolidation of of_device.h

This just moves the common stuff from the arch of_device.h files to
linux/of_device.h.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_device.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/linux/of_device.h

(limited to 'include/linux')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
new file mode 100644
index 000000000000..91bf84b9d144
--- /dev/null
+++ b/include/linux/of_device.h
@@ -0,0 +1,26 @@
+#ifndef _LINUX_OF_DEVICE_H
+#define _LINUX_OF_DEVICE_H
+#ifdef __KERNEL__
+
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/mod_devicetable.h>
+
+#include <asm/of_device.h>
+
+#define	to_of_device(d) container_of(d, struct of_device, dev)
+
+extern const struct of_device_id *of_match_node(
+	const struct of_device_id *matches, const struct device_node *node);
+extern const struct of_device_id *of_match_device(
+	const struct of_device_id *matches, const struct of_device *dev);
+
+extern struct of_device *of_dev_get(struct of_device *dev);
+extern void of_dev_put(struct of_device *dev);
+
+extern int of_device_register(struct of_device *ofdev);
+extern void of_device_unregister(struct of_device *ofdev);
+extern void of_release_dev(struct device *dev);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3


From b41912ca345e6de8ec8469d57cd585881271e2b9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 1 May 2007 16:12:57 +1000
Subject: Create linux/of_platorm.h

Move common stuff from asm-powerpc/of_platform.h to here and
move the common bits from asm-sparc*/of_device.h here as well.

Create asm-sparc*/of_platform.h and move appropriate parts of
of_device.h to them.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_platform.h | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 include/linux/of_platform.h

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
new file mode 100644
index 000000000000..c85d0f835783
--- /dev/null
+++ b/include/linux/of_platform.h
@@ -0,0 +1,55 @@
+#ifndef _LINUX_OF_PLATFORM_H
+#define _LINUX_OF_PLATFORM_H
+/*
+ *    Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
+ *			 <benh@kernel.crashing.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pm.h>
+#include <linux/of_device.h>
+
+/*
+ * The of_platform_bus_type is a bus type used by drivers that do not
+ * attach to a macio or similar bus but still use OF probing
+ * mechanism
+ */
+extern struct bus_type of_platform_bus_type;
+
+/*
+ * An of_platform_driver driver is attached to a basic of_device on
+ * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS
+ * busses on sparc).
+ */
+struct of_platform_driver
+{
+	char			*name;
+	struct of_device_id	*match_table;
+	struct module		*owner;
+
+	int	(*probe)(struct of_device* dev,
+			 const struct of_device_id *match);
+	int	(*remove)(struct of_device* dev);
+
+	int	(*suspend)(struct of_device* dev, pm_message_t state);
+	int	(*resume)(struct of_device* dev);
+	int	(*shutdown)(struct of_device* dev);
+
+	struct device_driver	driver;
+};
+#define	to_of_platform_driver(drv) \
+	container_of(drv,struct of_platform_driver, driver)
+
+#include <asm/of_platform.h>
+
+extern struct of_device *of_find_device_by_node(struct device_node *np);
+
+#endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3


From 3f23de10f283819bcdc0d2282e8b5b14c2e96d3b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 3 May 2007 02:38:57 +1000
Subject: Create drivers/of/platform.c

and populate it with the common parts from PowerPC and Sparc[64].

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_platform.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index c85d0f835783..5fd44e63fb26 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -52,4 +52,6 @@ struct of_platform_driver
 
 extern struct of_device *of_find_device_by_node(struct device_node *np);
 
+extern int of_bus_type_init(struct bus_type *bus, const char *name);
+
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3


From c2dea2d1fdbce86942dba0a968c523d8b7858bb5 Mon Sep 17 00:00:00 2001
From: Vasily Tarasov <vtaras@openvz.org>
Date: Fri, 20 Jul 2007 10:06:38 +0200
Subject: cfq: async queue allocation per priority

If we have two processes with different ioprio_class, but the same
ioprio_data, their async requests will fall into the same queue. I guess
such behavior is not expected, because it's not right to put real-time
requests and best-effort requests in the same queue.

The attached patch fixes the problem by introducing additional *cfqq
fields on cfqd, pointing to per-(class,priority) async queues.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/ioprio.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 2eaa142cd061..baf29387cab4 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -53,6 +53,14 @@ static inline int task_ioprio(struct task_struct *task)
 	return IOPRIO_NORM;
 }
 
+static inline int task_ioprio_class(struct task_struct *task)
+{
+	if (ioprio_valid(task->ioprio))
+		return IOPRIO_PRIO_CLASS(task->ioprio);
+
+	return IOPRIO_CLASS_BE;
+}
+
 static inline int task_nice_ioprio(struct task_struct *task)
 {
 	return (task_nice(task) + 20) / 5;
-- 
cgit v1.2.3


From 7a05f067c0da139613cbe74583bb7d208a5f87b9 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Mon, 14 May 2007 11:05:09 +0200
Subject: [ALSA] ASoC S3C24xx machine drivers - I2C ID for LM4857

This patch adds I2C ID for the LM4857 audio amp and corrects the spacing
of the WM8731, WM8750 and WM8753 ID's.

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/linux/i2c-id.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index aa83d4163096..b69014865714 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -115,9 +115,10 @@
 #define I2C_DRIVERID_KS0127	86	/* Samsung ks0127 video decoder */
 #define I2C_DRIVERID_TLV320AIC23B 87	/* TI TLV320AIC23B audio codec  */
 #define I2C_DRIVERID_ISL1208	88	/* Intersil ISL1208 RTC		*/
-#define I2C_DRIVERID_WM8731		89	/* Wolfson WM8731 audio codec */
-#define I2C_DRIVERID_WM8750		90	/* Wolfson WM8750 audio codec */
-#define I2C_DRIVERID_WM8753		91	/* Wolfson WM8753 audio codec */
+#define I2C_DRIVERID_WM8731	89	/* Wolfson WM8731 audio codec */
+#define I2C_DRIVERID_WM8750	90	/* Wolfson WM8750 audio codec */
+#define I2C_DRIVERID_WM8753	91	/* Wolfson WM8753 audio codec */
+#define I2C_DRIVERID_LM4857 	92 	/* LM4857 Audio Amplifier */
 
 #define I2C_DRIVERID_I2CDEV	900
 #define I2C_DRIVERID_ARP        902    /* SMBus ARP Client              */
-- 
cgit v1.2.3


From 9977126c4b65c1396b665f7a0eeb8c7dede336f9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:38 +0900
Subject: libata: add @is_cmd to ata_tf_to_fis()

Add @is_cmd to ata_tf_to_fis().  This controls bit 7 of the second
byte which tells the device whether this H2D FIS is for a command or
not.  This cleans up ahci a bit and will be used by PMP.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 47cd2a1c5544..5d3df6cde272 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -764,7 +764,8 @@ extern unsigned int ata_dev_try_classify(struct ata_port *, unsigned int, u8 *);
  */
 extern void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
 extern void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
-extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 *fis, u8 pmp);
+extern void ata_tf_to_fis(const struct ata_taskfile *tf,
+			  u8 pmp, int is_cmd, u8 *fis);
 extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
 extern void ata_noop_dev_select (struct ata_port *ap, unsigned int device);
 extern void ata_std_dev_select (struct ata_port *ap, unsigned int device);
-- 
cgit v1.2.3


From b64bbc39f2122a2276578e40144af69ef01decd4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:39 +0900
Subject: libata: improve EH report formatting

Requiring LLDs to format multiple error description messages properly
doesn't work too well.  Help LLDs a bit by making ata_ehi_push_desc()
insert ", " on each invocation.  __ata_ehi_push_desc() is the raw
version without the automatic separator.

While at it, make ehi_desc interface proper functions instead of
macros.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5d3df6cde272..94b37d180680 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -910,16 +910,9 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 /*
  * ata_eh_info helpers
  */
-#define ata_ehi_push_desc(ehi, fmt, args...) do { \
-	(ehi)->desc_len += scnprintf((ehi)->desc + (ehi)->desc_len, \
-				     ATA_EH_DESC_LEN - (ehi)->desc_len, \
-				     fmt , ##args); \
-} while (0)
-
-#define ata_ehi_clear_desc(ehi) do { \
-	(ehi)->desc[0] = '\0'; \
-	(ehi)->desc_len = 0; \
-} while (0)
+extern void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...);
+extern void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...);
+extern void ata_ehi_clear_desc(struct ata_eh_info *ehi);
 
 static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi)
 {
-- 
cgit v1.2.3


From 5335b729064e03319cd2d5219770451dbb1d7f67 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:40 +0900
Subject: libata: implement AC_ERR_NCQ

When an NCQ command fails, all commands in flight are aborted and the
offending one is reported using log page 10h.  Depending on controller
characteristics and LLD implementation, all commands may appear as
having a device error due to shared TF status making it hard to
determine what's actually going on.

This patch adds AC_ERR_NCQ, marks the command reported by log page 10h
with it and print extra "<F>" after the error report for the command
to help distinguishing the offending command.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 94b37d180680..cb181713d9b5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -323,6 +323,7 @@ enum ata_completion_errors {
 	AC_ERR_INVALID		= (1 << 7), /* invalid argument */
 	AC_ERR_OTHER		= (1 << 8), /* unknown */
 	AC_ERR_NODEV_HINT	= (1 << 9), /* polling device detection hint */
+	AC_ERR_NCQ		= (1 << 10), /* marker for offending NCQ qc */
 };
 
 /* forward declarations */
-- 
cgit v1.2.3


From da3dbb17a0e9a9ec7f5aed95f1fddadb790edc9d Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:40 +0900
Subject: libata: make ->scr_read/write callbacks return error code

Convert ->scr_read/write callbacks to return error code to better
indicate failure.  This will help handling of SCR_NOTIFICATION.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index cb181713d9b5..c732b3e78e28 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -620,9 +620,8 @@ struct ata_port_operations {
 	u8 (*irq_on) (struct ata_port *);
 	u8 (*irq_ack) (struct ata_port *ap, unsigned int chk_drq);
 
-	u32 (*scr_read) (struct ata_port *ap, unsigned int sc_reg);
-	void (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
-			   u32 val);
+	int (*scr_read) (struct ata_port *ap, unsigned int sc_reg, u32 *val);
+	int (*scr_write) (struct ata_port *ap, unsigned int sc_reg, u32 val);
 
 	int (*port_suspend) (struct ata_port *ap, pm_message_t mesg);
 	int (*port_resume) (struct ata_port *ap);
-- 
cgit v1.2.3


From 008a78961ec72990d09d7625ef9499d7317d040d Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:40 +0900
Subject: libata: improve SATA PHY speed down logic

sata_down_spd_limit() first reads the current SPD from SStatus and
limit the speed to the lower one of one below the current limit or one
below the current SPD in SStatus.  SPD may not be accessible or valid
when SPD down is requested making sata_down_spd_limit() fail when it's
most needed.

This patch makes the current SPD cached after each successful reset
and forces GEN I speed (1.5Gbps) if neither of SStatus or the cached
value is valid, so sata_down_spd_limit() is now guaranteed to lower
the speed limit if lower speed is available.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index c732b3e78e28..16ebdf152c75 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -531,6 +531,7 @@ struct ata_port {
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 	unsigned int		hw_sata_spd_limit;
 	unsigned int		sata_spd_limit;	/* SATA PHY speed limit */
+	unsigned int		sata_spd;	/* current SATA PHY speed */
 
 	/* record runtime error info, protected by host lock */
 	struct ata_eh_info	eh_info;
-- 
cgit v1.2.3


From f8f1e1cc0cd4d75c73e9a55a0ede8958e4fa14f1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:40 +0900
Subject: libata: reorganize ata_ehi_hotplugged()

__ata_ehi_hotplugged() now has no users.  Regorganize
ata_ehi_hotplugged() such that a new function ata_ehi_schedule_probe()
deals with scheduling probing.  ata_ehi_hotplugged() calls it and
additionally marks hotplug specific flags.  ata_ehi_schedule_probe()
will be used laster.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 16ebdf152c75..74800ad6d81f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -915,16 +915,17 @@ extern void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...);
 extern void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...);
 extern void ata_ehi_clear_desc(struct ata_eh_info *ehi);
 
-static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi)
+static inline void ata_ehi_schedule_probe(struct ata_eh_info *ehi)
 {
-	ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK;
+	ehi->flags |= ATA_EHI_RESUME_LINK;
 	ehi->action |= ATA_EH_SOFTRESET;
 	ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1;
 }
 
 static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi)
 {
-	__ata_ehi_hotplugged(ehi);
+	ata_ehi_schedule_probe(ehi);
+	ehi->flags |= ATA_EHI_HOTPLUGGED;
 	ehi->err_mask |= AC_ERR_ATA_BUS;
 }
 
-- 
cgit v1.2.3


From 5ddf24c5ea9d715dc4f5d5d5dd1c9337d90466dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 16 Jul 2007 14:29:41 +0900
Subject: libata: implement EH fast drain

In most cases, when EH is scheduled, all in-flight commands are
aborted causing EH to kick in immediately.  However, in some cases
(especially with PMP), it's unclear which commands are affected by the
error condition and although aborting all in-flight commands work, it
isn't optimal and may cause unnecessary disruption.  On the other
hand, waiting for in-flight commands to drain themselves can take up
to 30seconds.

This patch implements EH fast drain to handle such situations.  It
gives in-flight commands some time to finish up but doesn't wait for
too long.  After EH is scheduled, fast drain timer is started and if
no other completion occurs in ATA_EH_FASTDRAIN_INTERVAL all in-flight
commands are aborted.  If any completion occurred in the interval, the
port is given another interval to finish up itself.

Currently ATA_EH_FASTDRAIN_INTERVAL is 3 secs which should be enough
for finishing up most commands.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 74800ad6d81f..be5a43928c84 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -565,6 +565,9 @@ struct ata_port {
 	pm_message_t		pm_mesg;
 	int			*pm_result;
 
+	struct timer_list	fastdrain_timer;
+	unsigned long		fastdrain_cnt;
+
 	void			*private_data;
 
 #ifdef CONFIG_ATA_ACPI
-- 
cgit v1.2.3


From d046943cbaf332f75284ad99f4b3e60bae7ffff2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 20 Jul 2007 16:18:06 +0100
Subject: fix gfp_t annotations for slub

	Since we have use like ~SLUB_DMA, we ought to have the type
set right in both cases.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07f7e4cbcee3..124270df8734 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -160,7 +160,7 @@ static inline struct kmem_cache *kmalloc_slab(size_t size)
 #define SLUB_DMA __GFP_DMA
 #else
 /* Disable DMA functionality */
-#define SLUB_DMA 0
+#define SLUB_DMA (__force gfp_t)0
 #endif
 
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
-- 
cgit v1.2.3


From eb0645a8b1f14da300f40bb9f424640cd1181fbf Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 20 Jul 2007 00:31:46 -0700
Subject: async_tx: fix kmap_atomic usage in async_memcpy

Andrew Morton:
	[async_memcpy] is very wrong if both ASYNC_TX_KMAP_DST and
	ASYNC_TX_KMAP_SRC can ever be set.  We'll end up using the same kmap
	slot for both src add dest and we get either corrupted data or a BUG.

Evgeniy Polyakov:
	Btw, shouldn't it always be kmap_atomic() even if flag is not set.
	That pages are usual one returned by alloc_page().

So fix the usage of kmap_atomic and kill the ASYNC_TX_KMAP_DST and
ASYNC_TX_KMAP_SRC flags.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/async_tx.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index ff1255079fa1..bdca3f1b3213 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -51,10 +51,6 @@ struct dma_chan_ref {
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
  * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
- * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
- * take an atomic mapping (KM_USER0) on the source page(s)
- * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
- * take an atomic mapping (KM_USER0) on the dest page(s)
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
@@ -62,8 +58,6 @@ enum async_tx_flags {
 	ASYNC_TX_ASSUME_COHERENT = (1 << 2),
 	ASYNC_TX_ACK		 = (1 << 3),
 	ASYNC_TX_DEP_ACK	 = (1 << 4),
-	ASYNC_TX_KMAP_SRC	 = (1 << 5),
-	ASYNC_TX_KMAP_DST	 = (1 << 6),
 };
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From 0aa366f351d044703e25c8425e508170e80d83b1 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 20 Jul 2007 11:22:30 -0700
Subject: [IA64] Convert to generic timekeeping/clocksource

This is a merge of Peter Keilty's initial patch (which was
revived by Bob Picco) for this with Hidetoshi Seto's fixes
and scaling improvements.

Acked-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/clocksource.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index bf297b03a4e4..16ea3374dddf 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -67,6 +67,12 @@ struct clocksource {
 	unsigned long flags;
 	cycle_t (*vread)(void);
 	void (*resume)(void);
+#ifdef CONFIG_IA64
+	void *fsys_mmio;        /* used by fsyscall asm code */
+#define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
+#else
+#define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
+#endif
 
 	/* timekeeping specific data, ignore */
 	cycle_t cycle_interval;
-- 
cgit v1.2.3


From 1f564ad6d4182859612cbae452122e5eb2d62a76 Mon Sep 17 00:00:00 2001
From: Bob Picco <bob.picco@hp.com>
Date: Wed, 18 Jul 2007 15:51:28 -0700
Subject: [IA64] remove time interpolator

Remove time_interpolator code (This is generic code, but
only user was ia64.  It has been superseded by the
CONFIG_GENERIC_TIME code).

Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Peter Keilty <peter.keilty@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/timex.h | 60 ---------------------------------------------------
 1 file changed, 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index da929dbbea2a..37ac3ff90faf 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -224,66 +224,6 @@ static inline int ntp_synced(void)
 	__x < 0 ? -(-__x >> __s) : __x >> __s;	\
 })
 
-
-#ifdef CONFIG_TIME_INTERPOLATION
-
-#define TIME_SOURCE_CPU 0
-#define TIME_SOURCE_MMIO64 1
-#define TIME_SOURCE_MMIO32 2
-#define TIME_SOURCE_FUNCTION 3
-
-/* For proper operations time_interpolator clocks must run slightly slower
- * than the standard clock since the interpolator may only correct by having
- * time jump forward during a tick. A slower clock is usually a side effect
- * of the integer divide of the nanoseconds in a second by the frequency.
- * The accuracy of the division can be increased by specifying a shift.
- * However, this may cause the clock not to be slow enough.
- * The interpolator will self-tune the clock by slowing down if no
- * resets occur or speeding up if the time jumps per analysis cycle
- * become too high.
- *
- * Setting jitter compensates for a fluctuating timesource by comparing
- * to the last value read from the timesource to insure that an earlier value
- * is not returned by a later call. The price to pay
- * for the compensation is that the timer routines are not as scalable anymore.
- */
-
-struct time_interpolator {
-	u16 source;			/* time source flags */
-	u8 shift;			/* increases accuracy of multiply by shifting. */
-				/* Note that bits may be lost if shift is set too high */
-	u8 jitter;			/* if set compensate for fluctuations */
-	u32 nsec_per_cyc;		/* set by register_time_interpolator() */
-	void *addr;			/* address of counter or function */
-	cycles_t mask;			/* mask the valid bits of the counter */
-	unsigned long offset;		/* nsec offset at last update of interpolator */
-	u64 last_counter;		/* counter value in units of the counter at last update */
-	cycles_t last_cycle;		/* Last timer value if TIME_SOURCE_JITTER is set */
-	u64 frequency;			/* frequency in counts/second */
-	long drift;			/* drift in parts-per-million (or -1) */
-	unsigned long skips;		/* skips forward */
-	unsigned long ns_skipped;	/* nanoseconds skipped */
-	struct time_interpolator *next;
-};
-
-extern void register_time_interpolator(struct time_interpolator *);
-extern void unregister_time_interpolator(struct time_interpolator *);
-extern void time_interpolator_reset(void);
-extern unsigned long time_interpolator_get_offset(void);
-extern void time_interpolator_update(long delta_nsec);
-
-#else /* !CONFIG_TIME_INTERPOLATION */
-
-static inline void time_interpolator_reset(void)
-{
-}
-
-static inline void time_interpolator_update(long delta_nsec)
-{
-}
-
-#endif /* !CONFIG_TIME_INTERPOLATION */
-
 #define TICK_LENGTH_SHIFT	32
 
 #ifdef CONFIG_NO_HZ
-- 
cgit v1.2.3


From 1d4ec7b1d6f130818f9b62dea3411d9ee2ff6ff6 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 20 Jul 2007 12:13:20 -0700
Subject: Fix ZERO_OR_NULL_PTR(ZERO_SIZE_PTR)

The comparison with ZERO_SIZE_PTR in ZERO_OR_NULL_PTR() needs to be <=
(not just <) so that ZERO_OR_NULL_PTR(ZERO_SIZE_PTR) is 1.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
[ Duh!  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7d0ecc1659f0..d859354b9e51 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -40,7 +40,7 @@
  */
 #define ZERO_SIZE_PTR ((void *)16)
 
-#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) < \
+#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
 				(unsigned long)ZERO_SIZE_PTR)
 
 /*
-- 
cgit v1.2.3


From 8e68e2f248332a9c3fd4f08258f488c209bd3e0c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Jul 2007 21:39:47 +0200
Subject: [CELL] spufs: extension of spu_create to support affinity definition

This patch adds support for additional flags at spu_create, which relate
to the establishment of affinity between contexts and contexts to memory.
A fourth, optional, parameter is supported. This parameter represent
a affinity neighbor of the context being created, and is used when defining
SPU-SPU affinity.
Affinity is represented as a doubly linked list of spu_contexts.

Signed-off-by: Andre Detsch <adetsch@br.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7a8b1e3322e0..61def7c8fbb3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
 asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode);
+		unsigned int flags, mode_t mode, int fd);
 
 asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
 			    unsigned dev);
-- 
cgit v1.2.3


From 1474855d0878cced6f39f51f3c2bd7428b44cb1e Mon Sep 17 00:00:00 2001
From: Bob Nelson <rrnelson@linux.vnet.ibm.com>
Date: Fri, 20 Jul 2007 21:39:53 +0200
Subject: [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs

From: Maynard Johnson <mpjohn@us.ibm.com>

This patch updates the existing arch/powerpc/oprofile/op_model_cell.c
to add in the SPU profiling capabilities.  In addition, a 'cell' subdirectory
was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code.
Exports spu_set_profile_private_kref and spu_get_profile_private_kref which
are used by OProfile to store private profile information in spufs data
structures.

Also incorporated several fixes from other patches (rrn).  Check pointer
returned from kzalloc.  Eliminated unnecessary cast.  Better error
handling and cleanup in the related area.  64-bit unsigned long parameter
was being demoted to 32-bit unsigned int and eventually promoted back to
unsigned long.

Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: Bob Nelson <rrnelson@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/dcookies.h |  1 +
 include/linux/elf-em.h   |  3 ++-
 include/linux/oprofile.h | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index 0fe7cdf326f7..98c69ab80c84 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -12,6 +12,7 @@
 
 #ifdef CONFIG_PROFILING
  
+#include <linux/dcache.h>
 #include <linux/types.h>
  
 struct dcookie_user;
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 0311bad838b1..5834e843a946 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -20,7 +20,8 @@
 #define EM_PARISC	15	/* HPPA */
 #define EM_SPARC32PLUS	18	/* Sun's "v8plus" */
 #define EM_PPC		20	/* PowerPC */
-#define EM_PPC64	21       /* PowerPC64 */
+#define EM_PPC64	21	 /* PowerPC64 */
+#define EM_SPU		23	/* Cell BE SPU */
 #define EM_SH		42	/* SuperH */
 #define EM_SPARCV9	43	/* SPARC v9 64-bit */
 #define EM_IA_64	50	/* HP/Intel IA-64 */
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 0d514b252454..041bb31100f4 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -17,6 +17,26 @@
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
  
+/* Each escaped entry is prefixed by ESCAPE_CODE
+ * then one of the following codes, then the
+ * relevant data.
+ * These #defines live in this file so that arch-specific
+ * buffer sync'ing code can access them.
+ */
+#define ESCAPE_CODE			~0UL
+#define CTX_SWITCH_CODE			1
+#define CPU_SWITCH_CODE			2
+#define COOKIE_SWITCH_CODE		3
+#define KERNEL_ENTER_SWITCH_CODE	4
+#define KERNEL_EXIT_SWITCH_CODE		5
+#define MODULE_LOADED_CODE		6
+#define CTX_TGID_CODE			7
+#define TRACE_BEGIN_CODE		8
+#define TRACE_END_CODE			9
+#define XEN_ENTER_SWITCH_CODE		10
+#define SPU_PROFILING_CODE		11
+#define SPU_CTX_SWITCH_CODE		12
+
 struct super_block;
 struct dentry;
 struct file_operations;
@@ -35,6 +55,14 @@ struct oprofile_operations {
 	int (*start)(void);
 	/* Stop delivering interrupts. */
 	void (*stop)(void);
+	/* Arch-specific buffer sync functions.
+	 * Return value = 0:  Success
+	 * Return value = -1: Failure
+	 * Return value = 1:  Run generic sync function
+	 */
+	int (*sync_start)(void);
+	int (*sync_stop)(void);
+
 	/* Initiate a stack backtrace. Optional. */
 	void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
 	/* CPU identification string. */
@@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops);
  */
 void oprofile_arch_exit(void);
 
+/**
+ * Add data to the event buffer.
+ * The data passed is free-form, but typically consists of
+ * file offsets, dcookies, context information, and ESCAPE codes.
+ */
+void add_event_entry(unsigned long data);
+
 /**
  * Add a sample. This may be called from any context. Pass
  * smp_processor_id() as cpu.
-- 
cgit v1.2.3


From 3d58ffe2aa107df6db57f875dba5368960b17cde Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 18 Jul 2007 18:41:08 -0300
Subject: V4L/DVB (5867): videodev2.h: add missing <sys/time.h> for userspace

When videodev2.h is included by an application, it needs to include
<sys/time.h> for the timeval struct.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index d16a2b57dc81..c66c8a3410b9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -60,6 +60,7 @@
 #include <linux/compiler.h> /* need __user */
 #else
 #define __user
+#include <sys/time.h>
 #endif
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 31ce72a6b1c7635259cf522459539c0611f2c50c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 20 Jul 2007 19:45:45 -0700
Subject: [NET]: Fix loopback crashes when multiqueue is enabled.

From: Patrick McHardy <kaber@trash.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9820ca1e45e2..4a616d73cc25 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -575,7 +575,7 @@ struct net_device
 
 	/* The TX queue control structures */
 	unsigned int			egress_subqueue_count;
-	struct net_device_subqueue	egress_subqueue[0];
+	struct net_device_subqueue	egress_subqueue[1];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
-- 
cgit v1.2.3


From 39dca558a5b52b63e49bc234a7e887be092aa690 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Fri, 20 Jul 2007 18:22:17 -0500
Subject: [SCSI] bsg: make class backlinks

Currently, bsg doesn't make class backlinks (a process whereby you'd get
a link to bsg in the device directory in the same way you get one for
sg).  This is because the bsg device is uninitialised, so the class
device has nothing it can attach to.  The fix is to make the bsg device
point to the cdevice of the entity creating the bsg, necessitating
changing the bsg_register_queue() prototype into a form that takes the
generic device.

Acked-by: FUJITA Tomonori <tomof@acm.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/bsg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 8547b10c388b..f415f89e0ac8 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -57,10 +57,10 @@ struct bsg_class_device {
 	struct request_queue *queue;
 };
 
-extern int bsg_register_queue(struct request_queue *, const char *);
+extern int bsg_register_queue(struct request_queue *, struct device *, const char *);
 extern void bsg_unregister_queue(struct request_queue *);
 #else
-#define bsg_register_queue(disk, name)		(0)
+#define bsg_register_queue(disk, dev, name)		(0)
 #define bsg_unregister_queue(disk)	do { } while (0)
 #endif
 
-- 
cgit v1.2.3


From d3fec424b23c47686efcf3f2004c3f1c1cee4d9c Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Sat, 21 Jul 2007 04:37:26 -0700
Subject: coda: remove CODA_STORE/CODA_RELEASE upcalls

This is an variation on the patch sent by Christoph Hellwig which kills
file_count abuse by the Coda kernel module by moving the coda_flush
functionality into coda_release.  However part of reason we were using the
coda_flush callback was to allow Coda to pass errors that occur during
writeback from the userspace cache manager back to close().

As Al Viro explained on linux-fsdevel, it is impossible to guarantee that
such errors can in fact be returned back to the caller.  There are many
cases where the last reference to a file is not released by the close
system call and it is also impossible to pick some close as a 'last-close'
and delay it until all other references have been destroyed.

The CODA_STORE/CODA_RELEASE upcall combination is clearly a broken design,
and it is better to remove it completely.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@ftp.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coda_linux.h | 1 -
 include/linux/coda_psdev.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index c4079b403e9e..1c47a34aa794 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -36,7 +36,6 @@ extern const struct file_operations coda_ioctl_operations;
 
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
-int coda_flush(struct file *f, fl_owner_t id);
 int coda_release(struct inode *i, struct file *f);
 int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
 int coda_revalidate_inode(struct dentry *);
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index aa8f454b3b77..07ae8f846055 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -33,9 +33,6 @@ int venus_setattr(struct super_block *, struct CodaFid *, struct coda_vattr *);
 int venus_lookup(struct super_block *sb, struct CodaFid *fid, 
 		 const char *name, int length, int *type, 
 		 struct CodaFid *resfid);
-int venus_store(struct super_block *sb, struct CodaFid *fid, int flags,
-		vuid_t uid);
-int venus_release(struct super_block *sb, struct CodaFid *fid, int flags);
 int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
 		vuid_t uid);
 int venus_open(struct super_block *sb, struct CodaFid *fid, int flags,
-- 
cgit v1.2.3


From 93da56efcf8c6a111f0349f6b7651172d4745ca0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 21 Jul 2007 04:37:33 -0700
Subject: clockevents: remove prototypes of removed functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/clockchips.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8486e78f7335..8d7a39019ace 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -119,10 +119,6 @@ extern void clockevents_register_device(struct clock_event_device *dev);
 
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
-extern
-struct clock_event_device *clockevents_request_device(unsigned int features,
-						      cpumask_t cpumask);
-extern void clockevents_release_device(struct clock_event_device *dev);
 extern void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode);
 extern int clockevents_register_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 18de5bc4c1f1f1fa5e14f354a7603bd6e9d4e3b6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 21 Jul 2007 04:37:34 -0700
Subject: clockevents: fix resume logic

We need to make sure, that the clockevent devices are resumed, before
the tick is resumed. The current resume logic does not guarantee this.

Add CLOCK_EVT_MODE_RESUME and call the set mode functions of the clock
event devices before resuming the tick / oneshot functionality.

Fixup the existing users.

Thanks to Nigel Cunningham for tracking down a long standing thinko,
which affected the jinxed VAIO.

[akpm@linux-foundation.org: xen build fix]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/clockchips.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8d7a39019ace..e0bd46eb2414 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -23,6 +23,7 @@ enum clock_event_mode {
 	CLOCK_EVT_MODE_SHUTDOWN,
 	CLOCK_EVT_MODE_PERIODIC,
 	CLOCK_EVT_MODE_ONESHOT,
+	CLOCK_EVT_MODE_RESUME,
 };
 
 /* Clock event notification values */
-- 
cgit v1.2.3


From 82644459c592a28a3eab682f9b88d81019ddfe8b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 21 Jul 2007 04:37:37 -0700
Subject: NTP: move the cmos update code into ntp.c

i386 and sparc64 have the identical code to update the cmos clock.  Move it
into kernel/time/ntp.c as there are other architectures coming along with the
same requirements.

[akpm@linux-foundation.org: build fixes]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: David Miller <davem@davemloft.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index ec3b0ced0afe..e6aea5146e5d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #ifdef __KERNEL__
+# include <linux/cache.h>
 # include <linux/seqlock.h>
 #endif
 
@@ -94,6 +95,8 @@ extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock __attribute__((weak));
 
 extern unsigned long read_persistent_clock(void);
+extern int update_persistent_clock(struct timespec now);
+extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
 
 static inline unsigned long get_seconds(void)
-- 
cgit v1.2.3


From ae2c6dcf90c5a9ff9bd9a176cafd43a255fcc64b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Sat, 21 Jul 2007 17:09:56 +0200
Subject: x86_64: various cleanups in NUMA scan node

In acpi_scan_nodes(), we immediately return -1 if acpi_numa <= 0, meaning
we haven't detected any underlying ACPI topology or we have explicitly
disabled its use from the command-line with numa=noacpi.

acpi_table_print_srat_entry() and acpi_table_parse_srat() are only
referenced within drivers/acpi/numa.c, so we can mark them as static and
remove their prototypes from the header file.

Likewise, pxm_to_node_map[] and node_to_pxm_map[] are only used within
drivers/acpi/numa.c, so we mark them as static and remove their externs
from the header file.

The automatic 'result' variable is unused in acpi_numa_init(), so it's
removed.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dc234c508a6f..e88b62e6b3aa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -88,10 +88,8 @@ int acpi_table_parse (char *id, acpi_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
 	int entry_id, acpi_table_entry_handler handler, unsigned int max_entries);
 int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler handler, unsigned int max_entries);
-int acpi_table_parse_srat (enum acpi_srat_type id, acpi_table_entry_handler handler, unsigned int max_entries);
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
-void acpi_table_print_srat_entry (struct acpi_subtable_header *srat);
 
 /* the following four functions are architecture-dependent */
 #ifdef CONFIG_HAVE_ARCH_PARSE_SRAT
-- 
cgit v1.2.3


From a586df067afe0580bb02b7a6312ca2afe49bba03 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Jul 2007 17:10:00 +0200
Subject: x86: Support __attribute__((__cold__)) in gcc 4.3

gcc 4.3 supports a new __attribute__((__cold__)) to mark functions cold. Any
path directly leading to a call of this function will be unlikely. And gcc
will try to generate smaller code for the function itself.

Please use with care. The code generation advantage isn't large and in most
cases it is not worth uglifying code with this.

This patch marks some common error functions like panic(), printk()
as cold.  This will longer term make many unlikely()s unnecessary, although
we can keep them for now for older compilers.

BUG is not marked cold because there is currently no way to tell
gcc to mark a inline function told.

Also all __init and __exit functions are marked cold. With a non -Os
build this will tell the compiler to generate slightly smaller code
for them. I think it currently only uses less alignments for labels,
but that might change in the future.

One disadvantage over *likely() is that they cannot be easily instrumented
to verify them.

Another drawback is that only the latest gcc 4.3 snapshots support this.
Unfortunately we cannot detect this using the preprocessor. This means older
snapshots will fail now. I don't think that's a problem because they are
unreleased compilers that nobody should be using.

gcc also has a __hot__ attribute, but I don't see any sense in using
this in the kernel right now. But someday I hope gcc will be able
to use more aggressive optimizing for hot functions even in -Os,
if that happens it should be added.

Includes compile fix from Thomas Gleixner.

Cc: Jan Hubicka <jh@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 18 ++++++++++++++++++
 include/linux/compiler.h      |  9 +++++++++
 include/linux/init.h          |  8 ++++----
 include/linux/kernel.h        |  8 ++++----
 4 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index a03e9398a6c2..14f7494280f0 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -23,3 +23,21 @@
  * code
  */
 #define uninitialized_var(x) x = x
+
+#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+/* Mark functions as cold. gcc will assume any path leading to a call
+   to them will be unlikely.  This means a lot of manual unlikely()s
+   are unnecessary now for any paths leading to the usual suspects
+   like BUG(), printk(), panic() etc. [but let's keep them for now for
+   older compilers]
+
+   Early snapshots of gcc 4.3 don't support this and we can't detect this
+   in the preprocessor, but we can live with this because they're unreleased.
+   Maketime probing would be overkill here.
+
+   gcc also has a __attribute__((__hot__)) to move hot functions into
+   a special section, but I don't see any sense in this right now in
+   the kernel context */
+#define __cold			__attribute__((__cold__))
+
+#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8287a72bb6a9..12a1291855e2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -174,4 +174,13 @@ extern void __chk_io_ptr(const void __iomem *);
 # define __attribute_const__	/* unimplemented */
 #endif
 
+/*
+ * Tell gcc if a function is cold. The compiler will assume any path
+ * directly leading to the call is unlikely.
+ */
+
+#ifndef __cold
+#define __cold
+#endif
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index 5b5285316339..f0d0e3295a9b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,10 +40,10 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__attribute__ ((__section__ (".init.text")))
+#define __init		__attribute__ ((__section__ (".init.text"))) __cold
 #define __initdata	__attribute__ ((__section__ (".init.data")))
 #define __exitdata	__attribute__ ((__section__(".exit.data")))
-#define __exit_call	__attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
+#define __exit_call	__attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold
 
 /* modpost check for section mismatches during the kernel build.
  * A section mismatch happens when there are references from a
@@ -59,9 +59,9 @@
 #define __initdata_refok          __attribute__ ((__section__ (".data.init.refok")))
 
 #ifdef MODULE
-#define __exit		__attribute__ ((__section__(".exit.text")))
+#define __exit		__attribute__ ((__section__(".exit.text"))) __cold
 #else
-#define __exit		__attribute_used__ __attribute__ ((__section__(".exit.text")))
+#define __exit		__attribute_used__ __attribute__ ((__section__(".exit.text"))) __cold
 #endif
 
 /* For assembly routines */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1eb9cde550c4..4300bb462d29 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -106,7 +106,7 @@ extern int cond_resched(void);
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
-	__attribute__ ((NORET_AND format (printf, 1, 2)));
+	__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 extern int oops_may_print(void);
@@ -155,14 +155,14 @@ extern void dump_thread(struct pt_regs *regs, struct user *dump);
 asmlinkage int vprintk(const char *fmt, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
 asmlinkage int printk(const char * fmt, ...)
-	__attribute__ ((format (printf, 1, 2)));
+	__attribute__ ((format (printf, 1, 2))) __cold;
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
 static inline int vprintk(const char *s, va_list args) { return 0; }
 static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
-static inline int printk(const char *s, ...) { return 0; }
+static inline int __cold printk(const char *s, ...) { return 0; }
 #endif
 
 unsigned long int_sqrt(unsigned long);
@@ -212,7 +212,7 @@ extern enum system_states {
 #define TAINT_USER			(1<<6)
 #define TAINT_DIE			(1<<7)
 
-extern void dump_stack(void);
+extern void dump_stack(void) __cold;
 
 enum {
 	DUMP_PREFIX_NONE,
-- 
cgit v1.2.3


From 3484d79813707bb6045773953a809abba443dc20 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Sat, 21 Jul 2007 17:10:32 +0200
Subject: x86_64: fake pxm-to-node mapping for fake numa

For NUMA emulation, our SLIT should represent the true NUMA topology of the
system but our proximity domain to node ID mapping needs to reflect the
emulated state.

When NUMA emulation has successfully setup fake nodes on the system, a new
function, acpi_fake_nodes() is called.  This function determines the proximity
domain (_PXM) for each true node found on the system.  It then finds which
emulated nodes have been allocated on this true node as determined by its
starting address.  The node ID to PXM mapping is changed so that each fake
node ID points to the PXM of the true node that it is located on.

If the machine failed to register a SLIT, then we assume there is no special
requirement for emulated node affinity so we use the default LOCAL_DISTANCE,
which is newly exported to this code, as our measurement if the emulated nodes
appear in the same PXM.  Otherwise, we use REMOTE_DISTANCE.

PXM_INVAL and NID_INVAL are also exported to the ACPI header file so that we
can compare node_to_pxm() results in generic code (in this case, the SRAT
code).

Cc: Len Brown <lenb@kernel.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e88b62e6b3aa..d5680cd7746a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -231,6 +231,9 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
 
 extern int pnpacpi_disabled;
 
+#define PXM_INVAL	(-1)
+#define NID_INVAL	(-1)
+
 #else	/* CONFIG_ACPI */
 
 static inline int acpi_boot_init(void)
-- 
cgit v1.2.3


From 44bf4cea43816d43deab73c1c16361e899996eaa Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <nigel@nigel.suspend2.net>
Date: Sat, 21 Jul 2007 17:10:41 +0200
Subject: x86: PM_TRACE support

Signed-off-by: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/resume-trace.h | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index 81e9299ca148..f3f4f28c6960 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -2,6 +2,7 @@
 #define RESUME_TRACE_H
 
 #ifdef CONFIG_PM_TRACE
+#include <asm/resume-trace.h>
 
 extern int pm_trace_enabled;
 
@@ -9,20 +10,10 @@ struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(void *tracedata, unsigned int user);
 
-#define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do {					\
-	if (pm_trace_enabled) {					\
-		void *tracedata;				\
-		asm volatile("movl $1f,%0\n"			\
-			".section .tracedata,\"a\"\n"		\
-			"1:\t.word %c1\n"			\
-			"\t.long %c2\n"				\
-			".previous"				\
-			:"=r" (tracedata)			\
-			: "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
-	}							\
-} while (0)
+#define TRACE_DEVICE(dev) do { \
+	if (pm_trace_enabled) \
+		set_trace_device(dev); \
+	} while(0)
 
 #else
 
-- 
cgit v1.2.3


From 9585116ba09f1d8c52d0a1346e20bb9d443e9c02 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sat, 21 Jul 2007 17:11:35 +0200
Subject: i386: fix iounmap's use of vm_struct's size field

get_vm_area always returns an area with an adjacent guard page.  That guard
page is included in vm_struct.size.  iounmap uses vm_struct.size to
determine how much address space needs to have change_page_attr applied to
it, which will BUG if applied to the guard page.

This patch adds a helper function - get_vm_area_size() in linux/vmalloc.h -
to return the actual size of a vm area, and uses it to make iounmap do the
right thing.  There are probably other places which should be using
get_vm_area_size().

Thanks to Dave Young <hidave.darkstar@gmail.com> for debugging the
problem.

[ Andi, it wasn't clear to me whether x86_64 needs the same fix. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c2b10cae5da5..89338b468d0d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -58,6 +58,13 @@ void vmalloc_sync_all(void);
 /*
  *	Lowlevel-APIs (not for driver use!)
  */
+
+static inline size_t get_vm_area_size(const struct vm_struct *area)
+{
+	/* return actual size without guard page */
+	return area->size - PAGE_SIZE;
+}
+
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
-- 
cgit v1.2.3


From e97e2ddf07d6b6c2d621ddaec277e19f86c0cdb1 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Sat, 21 Jul 2007 19:07:33 -0700
Subject: [IrDA]: EP7211 IR driver port to the latest SIR API

The EP7211 SIR driver was the only one left without a new SIR API port.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/irda.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irda.h b/include/linux/irda.h
index 8e3735714c1c..28f88ecba344 100644
--- a/include/linux/irda.h
+++ b/include/linux/irda.h
@@ -77,6 +77,7 @@ typedef enum {
 	IRDA_ACT200L_DONGLE      = 10,
 	IRDA_MA600_DONGLE        = 11,
 	IRDA_TOIM3232_DONGLE     = 12,
+	IRDA_EP7211_DONGLE       = 13,
 } IRDA_DONGLE;
 
 /* Protocol types to be used for SOCK_DGRAM */
-- 
cgit v1.2.3


From d5a2f2f1d68e2da538ac28540cddd9ccc733b001 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Wed, 18 Jul 2007 23:45:42 -0300
Subject: ACPI: thinkpad-acpi: store ThinkPad model information

Keep note of ThinkPad model, BIOS and EC firmware information, and log it
on startup.  Makes for far more readable code in places, too.

This patch also adds Lenovo's PCI ID to the pci ids table.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b15c6498fe67..ced4d3f76104 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2040,6 +2040,8 @@
 #define PCI_DEVICE_ID_ALTIMA_AC9100	0x03ea
 #define PCI_DEVICE_ID_ALTIMA_AC1003	0x03eb
 
+#define PCI_VENDOR_ID_LENOVO		0x17aa
+
 #define PCI_VENDOR_ID_ARECA		0x17d3
 #define PCI_DEVICE_ID_ARECA_1110	0x1110
 #define PCI_DEVICE_ID_ARECA_1120	0x1120
-- 
cgit v1.2.3


From 8bf8df7120006b8c97ad3a9fcc79e2ba894c46dd Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sun, 22 Jul 2007 00:23:03 +1000
Subject: [POWERPC] Constify of_platform_driver name

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/of_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 5fd44e63fb26..22c3837784b2 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -31,7 +31,7 @@ extern struct bus_type of_platform_bus_type;
  */
 struct of_platform_driver
 {
-	char			*name;
+	const char		*name;
 	struct of_device_id	*match_table;
 	struct module		*owner;
 
-- 
cgit v1.2.3


From 51d261122d0ffac8cf91cc6e74ffcfea23faeb1c Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sun, 22 Jul 2007 00:27:01 +1000
Subject: [POWERPC] Constify of_platform_driver match_table

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/of_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 22c3837784b2..448f70b30a0c 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -32,7 +32,7 @@ extern struct bus_type of_platform_bus_type;
 struct of_platform_driver
 {
 	const char		*name;
-	struct of_device_id	*match_table;
+	const struct of_device_id	*match_table;
 	struct module		*owner;
 
 	int	(*probe)(struct of_device* dev,
-- 
cgit v1.2.3


From 41e1703b9b88cf9b5e91cdd2f7dcded3ec3917cb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <tomof@acm.org>
Date: Sun, 22 Jul 2007 10:06:50 +0900
Subject: [SCSI] bsg: unexport sg v3 helper functions

blk_fill_sghdr_rq, blk_unmap_sghdr_rq, and blk_complete_sghdr_rq were
exported for bsg, however bsg was changed to support only sg v4.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f78965fc6426..695e34964cb7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -698,11 +698,6 @@ extern int blk_execute_rq(request_queue_t *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
-extern int blk_fill_sghdr_rq(request_queue_t *, struct request *,
-		      struct sg_io_hdr *, int);
-extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *);
-extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *,
-			  struct bio *);
 extern int blk_verify_command(unsigned char *, int);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
-- 
cgit v1.2.3


From 74f2345b6be1410f824cb7dd638d2c10a9709379 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 4 Jun 2007 17:00:14 -0400
Subject: [PATCH] allow audit filtering on bit & operations

Right now the audit filter can match on = != > < >= blah blah blah.
This allow the filter to also look at bitwise AND operations, &

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8ca7ca0b47f0..a35859ab2fdb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -161,7 +161,7 @@
  * are currently used in an audit field constant understood by the kernel.
  * If you are adding a new #define AUDIT_<whatever>, please ensure that
  * AUDIT_UNUSED_BITS is updated if need be. */
-#define AUDIT_UNUSED_BITS	0x0FFFFC00
+#define AUDIT_UNUSED_BITS	0x07FFFC00
 
 
 /* Rule fields */
@@ -213,25 +213,29 @@
 #define AUDIT_NEGATE			0x80000000
 
 /* These are the supported operators.
- *	4  2  1
- *	=  >  <
- *	-------
- *	0  0  0		0	nonsense
- *	0  0  1		1	<
- *	0  1  0		2	>
- *	0  1  1		3	!=
- *	1  0  0		4	=
- *	1  0  1		5	<=
- *	1  1  0		6	>=
- *	1  1  1		7	all operators
+ *	4  2  1  8
+ *	=  >  <  ?
+ *	----------
+ *	0  0  0	 0	00	nonsense
+ *	0  0  0	 1	08	&  bit mask
+ *	0  0  1	 0	10	<
+ *	0  1  0	 0	20	>
+ *	0  1  1	 0	30	!=
+ *	1  0  0	 0	40	=
+ *	1  0  0	 1	48	&=  bit test
+ *	1  0  1	 0	50	<=
+ *	1  1  0	 0	60	>=
+ *	1  1  1	 1	78	all operators
  */
+#define AUDIT_BIT_MASK			0x08000000
 #define AUDIT_LESS_THAN			0x10000000
 #define AUDIT_GREATER_THAN		0x20000000
 #define AUDIT_NOT_EQUAL			0x30000000
 #define AUDIT_EQUAL			0x40000000
+#define AUDIT_BIT_TEST			(AUDIT_BIT_MASK|AUDIT_EQUAL)
 #define AUDIT_LESS_THAN_OR_EQUAL	(AUDIT_LESS_THAN|AUDIT_EQUAL)
 #define AUDIT_GREATER_THAN_OR_EQUAL	(AUDIT_GREATER_THAN|AUDIT_EQUAL)
-#define AUDIT_OPERATORS			(AUDIT_EQUAL|AUDIT_NOT_EQUAL)
+#define AUDIT_OPERATORS			(AUDIT_EQUAL|AUDIT_NOT_EQUAL|AUDIT_BIT_MASK)
 
 /* Status symbols */
 				/* Mask values */
-- 
cgit v1.2.3


From 4259fa01a2d2aa3e589b34ba7624080232d9c1ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Jun 2007 11:13:31 -0400
Subject: [PATCH] get rid of AVC_PATH postponed treatment

        Selinux folks had been complaining about the lack of AVC_PATH
records when audit is disabled.  I must admit my stupidity - I assumed
that avc_audit() really couldn't use audit_log_d_path() because of
deadlocks (== could be called with dcache_lock or vfsmount_lock held).
Shouldn't have made that assumption - it never gets called that way.
It _is_ called under spinlocks, but not those.

        Since audit_log_d_path() uses ab->gfp_mask for allocations,
kmalloc() in there is not a problem.  IOW, the simple fix is sufficient:
let's rip AUDIT_AVC_PATH out and simply generate pathname as part of main
record.  It's trivial to do.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: James Morris <jmorris@namei.org>
---
 include/linux/audit.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index a35859ab2fdb..4bbd8601b8f0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -411,7 +411,6 @@ extern int audit_bprm(struct linux_binprm *bprm);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
-extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
 extern int audit_set_macxattr(const char *name);
 extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
 extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
@@ -491,7 +490,6 @@ extern int audit_signals;
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_fd_pair(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
-#define audit_avc_path(dentry, mnt) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ({ 0; })
 #define audit_mq_timedsend(d,l,p,t) ({ 0; })
-- 
cgit v1.2.3


From abd4f7505bafdd6c5319fe3cb5caf9af6104e17a Mon Sep 17 00:00:00 2001
From: Masoud Asgharifard Sharbiani <masouds@google.com>
Date: Sun, 22 Jul 2007 11:12:28 +0200
Subject: x86: i386-show-unhandled-signals-v3

This patch makes the i386 behave the same way that x86_64 does when a
segfault happens.  A line gets printed to the kernel log so that tools
that need to check for failures can behave more uniformly between
debug.show_unhandled_signals sysctl variable to 0 (or by doing echo 0 >
/proc/sys/debug/exception-trace)

Also, all of the lines being printed are now using printk_ratelimit() to
deny the ability of DoS from a local user with a program like the
following:

main()
{
       while (1)
               if (!fork()) *(int *)0 = 0;
}

This new revision also includes the fix that Andrew did which got rid of
new sysctl that was added to the system in earlier versions of this.
Also, 'show-unhandled-signals' sysctl has been renamed back to the old
'exception-trace' to avoid breakage of people's scripts.

AK: Enabling by default for i386 will be likely controversal, but let's see what happens
AK: Really folks, before complaining just fix your segfaults
AK: I bet this will find a lot of silent issues

Signed-off-by: Masoud Sharbiani <masouds@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
[ Personally, I've found the complaints useful on x86-64, so I'm all for
  this. That said, I wonder if we could do it more prettily..   -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ea91abe740da..0ae338866240 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -237,12 +237,15 @@ extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern int show_unhandled_signals;
 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 
 extern struct kmem_cache *sighand_cachep;
 
+int unhandled_signal(struct task_struct *tsk, int sig);
+
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
  * or to the process as a whole (Linux thread group).  How the signal
-- 
cgit v1.2.3


From e9ed7e722e3f4cea07cf3c4bfe98c18180a17793 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 21 Jul 2007 23:29:12 +0100
Subject: take declarations of enable_irq() et.al. to linux/interrupt.h

Now that the last inlined instances are gone, all that is left to do
is turning disable_irq_nosync on arm26 and m68k from defines to aliases
and we are all set - we can make these externs in linux/interrupt.h
uncoditional and kill remaining instances in asm/irq.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/interrupt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5323f6275854..0a3c2ebf2008 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -120,11 +120,11 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
 # define local_irq_enable_in_hardirq()	local_irq_enable()
 #endif
 
-#ifdef CONFIG_GENERIC_HARDIRQS
 extern void disable_irq_nosync(unsigned int irq);
 extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 
+#ifdef CONFIG_GENERIC_HARDIRQS
 /*
  * Special lockdep variants of irq disabling/enabling.
  * These should be used for locking constructs that
-- 
cgit v1.2.3